1 this patch implements feature which allows ext4 fs users (e.g. Lustre)
2 to store data in ext4 dirent.
3 data is stored in ext4 dirent after file-name, this space is accounted
4 in de->rec_len. flag EXT4_DIRENT_LUFID added to d_type if extra data
7 make use of dentry->d_fsdata to pass fid to ext4. so no
8 changes in ext4_add_entry() interface required.
10 Index: b/fs/ext4/dir.c
11 ===================================================================
14 @@ -53,11 +53,18 @@ const struct file_operations ext4_dir_op
16 static unsigned char get_dtype(struct super_block *sb, int filetype)
18 + int fl_index = filetype & EXT4_FT_MASK;
20 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
21 - (filetype >= EXT4_FT_MAX))
22 + (fl_index >= EXT4_FT_MAX))
25 - return (ext4_filetype_table[filetype]);
26 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
27 + return (ext4_filetype_table[fl_index]);
29 + return (ext4_filetype_table[fl_index]) |
30 + (filetype & EXT4_DIRENT_LUFID);
35 @@ -69,11 +76,11 @@ int ext4_check_dir_entry (const char * f
36 const char * error_msg = NULL;
37 const int rlen = ext4_rec_len_from_disk(de->rec_len);
39 - if (rlen < EXT4_DIR_REC_LEN(1))
40 + if (rlen < __EXT4_DIR_REC_LEN(1))
41 error_msg = "rec_len is smaller than minimal";
42 else if (rlen % 4 != 0)
43 error_msg = "rec_len % 4 != 0";
44 - else if (rlen < EXT4_DIR_REC_LEN(de->name_len))
45 + else if (rlen < EXT4_DIR_REC_LEN(de))
46 error_msg = "rec_len is too small for name_len";
47 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
48 error_msg = "directory entry across blocks";
49 @@ -175,7 +182,7 @@ revalidate:
50 * failure will be detected in the
51 * dirent test below. */
52 if (ext4_rec_len_from_disk(de->rec_len)
53 - < EXT4_DIR_REC_LEN(1))
54 + < __EXT4_DIR_REC_LEN(1))
56 i += ext4_rec_len_from_disk(de->rec_len);
58 @@ -335,12 +342,17 @@ int ext4_htree_store_dirent(struct file
59 struct fname * fname, *new_fn;
60 struct dir_private_info *info;
64 info = (struct dir_private_info *) dir_file->private_data;
65 p = &info->root.rb_node;
67 /* Create and allocate the fname structure */
68 - len = sizeof(struct fname) + dirent->name_len + 1;
69 + if (dirent->file_type & EXT4_DIRENT_LUFID)
70 + extra_data = ext4_get_dirent_data_len(dirent);
72 + len = sizeof(struct fname) + dirent->name_len + extra_data;
74 new_fn = kzalloc(len, GFP_KERNEL);
77 @@ -349,7 +361,7 @@ int ext4_htree_store_dirent(struct file
78 new_fn->inode = le32_to_cpu(dirent->inode);
79 new_fn->name_len = dirent->name_len;
80 new_fn->file_type = dirent->file_type;
81 - memcpy(new_fn->name, dirent->name, dirent->name_len);
82 + memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data);
83 new_fn->name[dirent->name_len] = 0;
86 Index: b/fs/ext4/ext4.h
87 ===================================================================
90 @@ -771,6 +771,7 @@ static inline int ext4_valid_inum(struct
91 #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
92 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100
93 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
94 +#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000
96 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
97 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
98 @@ -779,7 +780,9 @@ static inline int ext4_valid_inum(struct
99 EXT4_FEATURE_INCOMPAT_EXTENTS| \
100 EXT4_FEATURE_INCOMPAT_64BIT| \
101 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
102 - EXT4_FEATURE_INCOMPAT_MMP)
103 + EXT4_FEATURE_INCOMPAT_MMP| \
104 + EXT4_FEATURE_INCOMPAT_DIRDATA)
106 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
107 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
108 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
109 @@ -847,6 +850,43 @@ struct ext4_dir_entry_2 {
110 #define EXT4_FT_SYMLINK 7
112 #define EXT4_FT_MAX 8
113 +#define EXT4_FT_MASK 0xf
115 +#if EXT4_FT_MAX > EXT4_FT_MASK
116 +#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
120 + * d_type has 4 unused bits, so it can hold four types data. these different
121 + * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
122 + * stored, in flag order, after file-name in ext4 dirent.
125 + * this flag is added to d_type if ext4 dirent has extra data after
126 + * filename. this data length is variable and length is stored in first byte
127 + * of data. data start after filename NUL byte.
128 + * This is used by Lustre FS.
130 +#define EXT4_DIRENT_LUFID 0x10
132 +#define EXT4_LUFID_MAGIC 0xAD200907UL
133 +struct ext4_dentry_param {
134 + __u32 edp_magic; /* EXT4_LUFID_MAGIC */
135 + char edp_len; /* size of edp_data in bytes */
136 + char edp_data[0]; /* packed array of data */
137 +} __attribute__((packed));
139 +static inline unsigned char *ext4_dentry_get_data(struct super_block *sb,
140 + struct ext4_dentry_param* p)
143 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
145 + if (p && p->edp_magic == EXT4_LUFID_MAGIC)
146 + return &p->edp_len;
152 * EXT4_DIR_PAD defines the directory entries boundaries
153 @@ -855,8 +895,11 @@ struct ext4_dir_entry_2 {
155 #define EXT4_DIR_PAD 4
156 #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
157 -#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
158 +#define __EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
160 +#define EXT4_DIR_REC_LEN(de) (__EXT4_DIR_REC_LEN(de->name_len +\
161 + ext4_get_dirent_data_len(de)))
163 #define EXT4_MAX_REC_LEN ((1<<16)-1)
165 static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
166 @@ -1155,7 +1198,7 @@ extern struct buffer_head * ext4_find_en
167 struct ext4_dir_entry_2
169 extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
170 - struct inode *inode);
171 + struct inode *inode, const void *, const void *);
172 extern int ext4_orphan_add(handle_t *, struct inode *);
173 extern int ext4_orphan_del(handle_t *, struct inode *);
174 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
175 @@ -1343,7 +1386,28 @@ static inline int ext4_is_group_locked(s
176 return spin_is_locked(ext4_group_lock_ptr(sb, group));
181 + * Compute the total directory entry data length.
182 + * This includes the filename and an implicit NUL terminator (always present),
183 + * and optional extensions. Each extension has a bit set in the high 4 bits of
184 + * de->file_type, and the extension length is the first byte in each entry.
187 +static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
189 + char *len = de->name + de->name_len + 1 /* NUL terminator */;
191 + __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
193 + while (extra_data_flags) {
194 + if (extra_data_flags & 1) {
195 + dlen += *len + (dlen == 0);
198 + extra_data_flags >>= 1;
203 #endif /* __KERNEL__ */
205 Index: b/fs/ext4/namei.c
206 ===================================================================
207 --- a/fs/ext4/namei.c
208 +++ b/fs/ext4/namei.c
209 @@ -177,7 +177,8 @@ static unsigned dx_get_count (struct dx_
210 static unsigned dx_get_limit (struct dx_entry *entries);
211 static void dx_set_count(struct dx_entry *entries, unsigned value);
212 static void dx_set_limit(struct dx_entry *entries, unsigned value);
213 -static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
214 +static inline unsigned dx_root_limit(__u32 blocksize,
215 + struct ext4_dir_entry_2 *dot_de, unsigned infosize);
216 static unsigned dx_node_limit(struct inode *dir);
217 static struct dx_frame *dx_probe(struct dentry *dentry,
219 @@ -218,11 +219,12 @@ ext4_next_entry(struct ext4_dir_entry_2
221 struct dx_root_info * dx_get_dx_info(struct ext4_dir_entry_2 *de)
223 - /* get dotdot first */
224 - de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
225 + BUG_ON(de->name_len != 1);
226 + /* get dotdot first */
227 + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
229 - /* dx root info is after dotdot entry */
230 - de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
231 + /* dx root info is after dotdot entry */
232 + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
234 return (struct dx_root_info *) de;
236 @@ -267,16 +269,23 @@ static inline void dx_set_limit (struct
237 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
240 -static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
241 +static inline unsigned dx_root_limit(__u32 blocksize,
242 + struct ext4_dir_entry_2 *dot_de, unsigned infosize)
244 - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
245 - EXT4_DIR_REC_LEN(2) - infosize;
246 + struct ext4_dir_entry_2 *dotdot_de;
247 + unsigned entry_space;
249 + BUG_ON(dot_de->name_len != 1);
250 + dotdot_de = ext4_next_entry(dot_de);
251 + entry_space = blocksize - EXT4_DIR_REC_LEN(dot_de) -
252 + EXT4_DIR_REC_LEN(dotdot_de) - infosize;
254 return entry_space / sizeof(struct dx_entry);
257 static inline unsigned dx_node_limit(struct inode *dir)
259 - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
260 + unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0);
261 return entry_space / sizeof(struct dx_entry);
264 @@ -323,7 +332,7 @@ static struct stats dx_show_leaf(struct
265 printk(":%x.%u ", h.hash,
266 ((char *) de - base));
268 - space += EXT4_DIR_REC_LEN(de->name_len);
269 + space += EXT4_DIR_REC_LEN(de);
272 de = ext4_next_entry(de);
273 @@ -427,7 +436,8 @@ dx_probe(struct dentry *dentry, struct i
275 entries = (struct dx_entry *) (((char *)info) + info->info_length);
277 - if (dx_get_limit(entries) != dx_root_limit(dir,
278 + if (dx_get_limit(entries) != dx_root_limit(dir->i_sb->s_blocksize,
279 + (struct ext4_dir_entry_2*)bh->b_data,
280 info->info_length)) {
281 ext4_warning(dir->i_sb, __func__,
282 "dx entry: limit != root limit");
283 @@ -617,7 +627,7 @@ static int htree_dirblock_to_tree(struct
284 de = (struct ext4_dir_entry_2 *) bh->b_data;
285 top = (struct ext4_dir_entry_2 *) ((char *) de +
286 dir->i_sb->s_blocksize -
287 - EXT4_DIR_REC_LEN(0));
288 + __EXT4_DIR_REC_LEN(0));
289 for (; de < top; de = ext4_next_entry(de)) {
290 if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
291 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
292 @@ -1026,7 +1036,7 @@ static struct buffer_head * ext4_dx_find
294 de = (struct ext4_dir_entry_2 *) bh->b_data;
295 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
296 - EXT4_DIR_REC_LEN(0));
297 + __EXT4_DIR_REC_LEN(0));
298 for (; de < top; de = ext4_next_entry(de)) {
299 int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
300 + ((char *) de - bh->b_data);
301 @@ -1193,7 +1203,7 @@ dx_move_dirents(char *from, char *to, st
304 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
305 - rec_len = EXT4_DIR_REC_LEN(de->name_len);
306 + rec_len = EXT4_DIR_REC_LEN(de);
307 memcpy (to, de, rec_len);
308 ((struct ext4_dir_entry_2 *) to)->rec_len =
309 ext4_rec_len_to_disk(rec_len);
310 @@ -1217,7 +1227,7 @@ static struct ext4_dir_entry_2* dx_pack_
311 while ((char*)de < base + size) {
312 next = ext4_next_entry(de);
313 if (de->inode && de->name_len) {
314 - rec_len = EXT4_DIR_REC_LEN(de->name_len);
315 + rec_len = EXT4_DIR_REC_LEN(de);
317 memmove(to, de, rec_len);
318 to->rec_len = ext4_rec_len_to_disk(rec_len);
319 @@ -1347,10 +1357,16 @@ static int add_dirent_to_buf(handle_t *h
320 int namelen = dentry->d_name.len;
321 unsigned long offset = 0;
322 unsigned short reclen;
323 - int nlen, rlen, err;
324 + int nlen, rlen, err, dlen = 0;
325 + unsigned char *data;
328 - reclen = EXT4_DIR_REC_LEN(namelen);
329 + data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
332 + dlen = (*data) + 1;
334 + reclen = __EXT4_DIR_REC_LEN(namelen + dlen);
336 de = (struct ext4_dir_entry_2 *)bh->b_data;
337 top = bh->b_data + dir->i_sb->s_blocksize - reclen;
338 @@ -1364,7 +1380,7 @@ static int add_dirent_to_buf(handle_t *h
342 - nlen = EXT4_DIR_REC_LEN(de->name_len);
343 + nlen = EXT4_DIR_REC_LEN(de);
344 rlen = ext4_rec_len_from_disk(de->rec_len);
345 if ((de->inode? rlen - nlen: rlen) >= reclen)
347 @@ -1383,7 +1399,7 @@ static int add_dirent_to_buf(handle_t *h
350 /* By now the buffer is marked for journaling */
351 - nlen = EXT4_DIR_REC_LEN(de->name_len);
352 + nlen = EXT4_DIR_REC_LEN(de);
353 rlen = ext4_rec_len_from_disk(de->rec_len);
355 struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
356 @@ -1399,6 +1415,12 @@ static int add_dirent_to_buf(handle_t *h
358 de->name_len = namelen;
359 memcpy(de->name, name, namelen);
361 + de->name[namelen] = 0;
362 + memcpy(&de->name[namelen + 1], data, *(char *) data);
363 + de->file_type |= EXT4_DIRENT_LUFID;
367 * XXX shouldn't update any times until successful
368 * completion of syscall, but too many callers depend
369 @@ -1488,7 +1510,8 @@ static int make_indexed_dir(handle_t *ha
371 dx_set_block(entries, 1);
372 dx_set_count(entries, 1);
373 - dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
374 + dx_set_limit(entries, dx_root_limit(dir->i_sb->s_blocksize,
375 + dot_de, sizeof(*dx_info)));
377 /* Initialize as for dx_probe */
378 hinfo.hash_version = dx_info->hash_version;
379 @@ -1516,6 +1539,8 @@ static int ext4_update_dotdot(handle_t *
380 struct buffer_head * dir_block;
381 struct ext4_dir_entry_2 * de;
382 int len, journal = 0, err = 0;
387 return PTR_ERR(handle);
388 @@ -1531,19 +1556,24 @@ static int ext4_update_dotdot(handle_t *
389 /* the first item must be "." */
390 assert(de->name_len == 1 && de->name[0] == '.');
391 len = le16_to_cpu(de->rec_len);
392 - assert(len >= EXT4_DIR_REC_LEN(1));
393 - if (len > EXT4_DIR_REC_LEN(1)) {
394 + assert(len >= __EXT4_DIR_REC_LEN(1));
395 + if (len > __EXT4_DIR_REC_LEN(1)) {
396 BUFFER_TRACE(dir_block, "get_write_access");
397 err = ext4_journal_get_write_access(handle, dir_block);
402 - de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
403 + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
406 - len -= EXT4_DIR_REC_LEN(1);
407 - assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
408 + len -= EXT4_DIR_REC_LEN(de);
409 + data = ext4_dentry_get_data(dir->i_sb,
410 + (struct ext4_dentry_param *) dentry->d_fsdata);
413 + assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen));
415 de = (struct ext4_dir_entry_2 *)
416 ((char *) de + le16_to_cpu(de->rec_len));
418 @@ -1557,10 +1587,15 @@ static int ext4_update_dotdot(handle_t *
420 de->rec_len = cpu_to_le16(len);
422 - assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
423 + assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2));
425 strcpy (de->name, "..");
426 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
429 + memcpy(&de->name[2 + 1], data, dlen);
430 + de->file_type |= EXT4_DIRENT_LUFID;
435 @@ -1972,11 +2007,12 @@ retry:
436 /* Initialize @inode as a subdirectory of @dir, and add the
437 * "." and ".." entries into the first directory block. */
438 int ext4_add_dot_dotdot(handle_t *handle, struct inode * dir,
439 - struct inode *inode)
440 + struct inode *inode,
441 + const void *data1, const void *data2)
443 struct buffer_head * dir_block;
444 struct ext4_dir_entry_2 * de;
446 + int err = 0, dot_reclen;
449 return PTR_ERR(handle);
450 @@ -1993,16 +2029,34 @@ int ext4_add_dot_dotdot(handle_t *handle
451 de = (struct ext4_dir_entry_2 *) dir_block->b_data;
452 de->inode = cpu_to_le32(inode->i_ino);
454 - de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
455 strcpy(de->name, ".");
456 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
457 + /* get packed fid data*/
458 + data1 = ext4_dentry_get_data(dir->i_sb,
459 + (struct ext4_dentry_param *) data1);
462 + memcpy(&de->name[2], data1, *(char *) data1);
463 + de->file_type |= EXT4_DIRENT_LUFID;
465 + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
466 + dot_reclen = cpu_to_le16(de->rec_len);
468 de = ext4_next_entry(de);
469 de->inode = cpu_to_le32(dir->i_ino);
470 de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
471 - EXT4_DIR_REC_LEN(1));
474 strcpy(de->name, "..");
475 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
476 + data2 = ext4_dentry_get_data(dir->i_sb,
477 + (struct ext4_dentry_param *) data2);
480 + memcpy(&de->name[3], data2, *(char *) data2);
481 + de->file_type |= EXT4_DIRENT_LUFID;
485 BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
486 ext4_journal_dirty_metadata(handle, dir_block);
487 @@ -2037,7 +2091,7 @@ retry:
491 - err = ext4_add_dot_dotdot(handle, dir, inode);
492 + err = ext4_add_dot_dotdot(handle, dir, inode, NULL, NULL);
496 @@ -2071,7 +2125,7 @@ static int empty_dir (struct inode * ino
500 - if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
501 + if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2) ||
502 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
504 ext4_error(inode->i_sb, __func__,