2 fs/ext4/dir.c | 26 +++++++++---
3 fs/ext4/ext4.h | 70 ++++++++++++++++++++++++++++++++-
4 fs/ext4/namei.c | 117 ++++++++++++++++++++++++++++++++++++++++----------------
5 3 files changed, 170 insertions(+), 43 deletions(-)
9 @@ -53,11 +53,18 @@ const struct file_operations ext4_dir_op
11 static unsigned char get_dtype(struct super_block *sb, int filetype)
13 + int fl_index = filetype & EXT4_FT_MASK;
15 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
16 - (filetype >= EXT4_FT_MAX))
17 + (fl_index >= EXT4_FT_MAX))
20 - return (ext4_filetype_table[filetype]);
21 + if (!test_opt(sb, DIRDATA))
22 + return (ext4_filetype_table[fl_index]);
24 + return (ext4_filetype_table[fl_index]) |
25 + (filetype & EXT4_DIRENT_LUFID);
30 @@ -75,11 +82,11 @@ int __ext4_check_dir_entry(const char *f
31 const int rlen = ext4_rec_len_from_disk(de->rec_len,
32 dir->i_sb->s_blocksize);
34 - if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
35 + if (unlikely(rlen < __EXT4_DIR_REC_LEN(1)))
36 error_msg = "rec_len is smaller than minimal";
37 else if (unlikely(rlen % 4 != 0))
38 error_msg = "rec_len % 4 != 0";
39 - else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
40 + else if (unlikely(rlen < EXT4_DIR_REC_LEN(de)))
41 error_msg = "rec_len is too small for name_len";
42 else if (unlikely(((char *) de - bh->b_data) + rlen >
43 dir->i_sb->s_blocksize))
44 @@ -196,7 +203,7 @@ revalidate:
45 * failure will be detected in the
46 * dirent test below. */
47 if (ext4_rec_len_from_disk(de->rec_len,
48 - sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
49 + sb->s_blocksize) < __EXT4_DIR_REC_LEN(1))
51 i += ext4_rec_len_from_disk(de->rec_len,
53 @@ -359,12 +366,17 @@ int ext4_htree_store_dirent(struct file
54 struct fname *fname, *new_fn;
55 struct dir_private_info *info;
59 info = dir_file->private_data;
60 p = &info->root.rb_node;
62 /* Create and allocate the fname structure */
63 - len = sizeof(struct fname) + dirent->name_len + 1;
64 + if (dirent->file_type & EXT4_DIRENT_LUFID)
65 + extra_data = ext4_get_dirent_data_len(dirent);
67 + len = sizeof(struct fname) + dirent->name_len + extra_data;
69 new_fn = kzalloc(len, GFP_KERNEL);
72 @@ -373,7 +385,7 @@ int ext4_htree_store_dirent(struct file
73 new_fn->inode = le32_to_cpu(dirent->inode);
74 new_fn->name_len = dirent->name_len;
75 new_fn->file_type = dirent->file_type;
76 - memcpy(new_fn->name, dirent->name, dirent->name_len);
77 + memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data);
78 new_fn->name[dirent->name_len] = 0;
83 @@ -902,6 +902,7 @@ struct ext4_inode_info {
84 #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
85 #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
86 #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
87 +#define EXT4_MOUNT_DIRDATA 0x00200 /* Data in directory entries */
88 #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
89 #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
90 #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
91 @@ -1414,7 +1414,9 @@ static inline void ext4_clear_state_flag
92 EXT4_FEATURE_INCOMPAT_EXTENTS| \
93 EXT4_FEATURE_INCOMPAT_64BIT| \
94 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
95 - EXT4_FEATURE_INCOMPAT_MMP)
96 + EXT4_FEATURE_INCOMPAT_MMP| \
97 + EXT4_FEATURE_INCOMPAT_DIRDATA)
99 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
100 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
101 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
102 @@ -1501,6 +1503,43 @@ struct ext4_dir_entry_2 {
103 #define EXT4_FT_SYMLINK 7
105 #define EXT4_FT_MAX 8
106 +#define EXT4_FT_MASK 0xf
108 +#if EXT4_FT_MAX > EXT4_FT_MASK
109 +#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
113 + * d_type has 4 unused bits, so it can hold four types data. these different
114 + * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
115 + * stored, in flag order, after file-name in ext4 dirent.
118 + * this flag is added to d_type if ext4 dirent has extra data after
119 + * filename. this data length is variable and length is stored in first byte
120 + * of data. data start after filename NUL byte.
121 + * This is used by Lustre FS.
123 +#define EXT4_DIRENT_LUFID 0x10
125 +#define EXT4_LUFID_MAGIC 0xAD200907UL
126 +struct ext4_dentry_param {
127 + __u32 edp_magic; /* EXT4_LUFID_MAGIC */
128 + char edp_len; /* size of edp_data in bytes */
129 + char edp_data[0]; /* packed array of data */
130 +} __attribute__((packed));
132 +static inline unsigned char *ext4_dentry_get_data(struct super_block *sb,
133 + struct ext4_dentry_param* p)
136 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
138 + if (p && p->edp_magic == EXT4_LUFID_MAGIC)
139 + return &p->edp_len;
145 * EXT4_DIR_PAD defines the directory entries boundaries
146 @@ -1509,8 +1548,11 @@ struct ext4_dir_entry_2 {
148 #define EXT4_DIR_PAD 4
149 #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
150 -#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
151 +#define __EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
153 +#define EXT4_DIR_REC_LEN(de) (__EXT4_DIR_REC_LEN(de->name_len +\
154 + ext4_get_dirent_data_len(de)))
156 #define EXT4_MAX_REC_LEN ((1<<16)-1)
159 @@ -1908,7 +1950,7 @@ extern struct buffer_head * ext4_find_en
160 struct ext4_dir_entry_2 ** res_dir);
161 #define ll_ext4_find_entry(inode, dentry, res_dir) ext4_find_entry(inode, &(dentry)->d_name, res_dir)
162 extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
163 - struct inode *inode);
164 + struct inode *inode, const void *, const void *);
165 extern struct buffer_head *ext4_append(handle_t *handle,
167 ext4_lblk_t *block, int *err);
168 @@ -2308,6 +2350,28 @@ static inline void set_bitmap_uptodate(s
169 extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
170 extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
173 + * Compute the total directory entry data length.
174 + * This includes the filename and an implicit NUL terminator (always present),
175 + * and optional extensions. Each extension has a bit set in the high 4 bits of
176 + * de->file_type, and the extension length is the first byte in each entry.
178 +static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
180 + char *len = de->name + de->name_len + 1 /* NUL terminator */;
182 + __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
184 + while (extra_data_flags) {
185 + if (extra_data_flags & 1) {
186 + dlen += *len + (dlen == 0);
189 + extra_data_flags >>= 1;
194 #endif /* __KERNEL__ */
197 --- a/fs/ext4/namei.c
198 +++ b/fs/ext4/namei.c
199 @@ -170,7 +170,8 @@ static unsigned dx_get_count(struct dx_e
200 static unsigned dx_get_limit(struct dx_entry *entries);
201 static void dx_set_count(struct dx_entry *entries, unsigned value);
202 static void dx_set_limit(struct dx_entry *entries, unsigned value);
203 -static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
204 +static inline unsigned dx_root_limit(__u32 blocksize,
205 + struct ext4_dir_entry_2 *dot_de, unsigned infosize);
206 static unsigned dx_node_limit(struct inode *dir);
207 static struct dx_frame *dx_probe(const struct qstr *d_name,
209 @@ -213,11 +214,12 @@ ext4_next_entry(struct ext4_dir_entry_2
211 struct dx_root_info * dx_get_dx_info(struct ext4_dir_entry_2 *de)
213 - /* get dotdot first */
214 - de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
215 + BUG_ON(de->name_len != 1);
216 + /* get dotdot first */
217 + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
219 - /* dx root info is after dotdot entry */
220 - de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
221 + /* dx root info is after dotdot entry */
222 + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
224 return (struct dx_root_info *) de;
226 @@ -262,16 +264,23 @@ static inline void dx_set_limit(struct d
227 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
230 -static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
231 +static inline unsigned dx_root_limit(__u32 blocksize,
232 + struct ext4_dir_entry_2 *dot_de, unsigned infosize)
234 - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
235 - EXT4_DIR_REC_LEN(2) - infosize;
236 + struct ext4_dir_entry_2 *dotdot_de;
237 + unsigned entry_space;
239 + BUG_ON(dot_de->name_len != 1);
240 + dotdot_de = ext4_next_entry(dot_de, blocksize);
241 + entry_space = blocksize - EXT4_DIR_REC_LEN(dot_de) -
242 + EXT4_DIR_REC_LEN(dotdot_de) - infosize;
244 return entry_space / sizeof(struct dx_entry);
247 static inline unsigned dx_node_limit(struct inode *dir)
249 - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
250 + unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0);
251 return entry_space / sizeof(struct dx_entry);
254 @@ -318,7 +327,7 @@ static struct stats dx_show_leaf(struct
255 printk(":%x.%u ", h.hash,
256 ((char *) de - base));
258 - space += EXT4_DIR_REC_LEN(de->name_len);
259 + space += EXT4_DIR_REC_LEN(de);
262 de = ext4_next_entry(de, size);
263 @@ -420,7 +429,8 @@ dx_probe(const struct qstr *d_name, stru
265 entries = (struct dx_entry *) (((char *)info) + info->info_length);
267 - if (dx_get_limit(entries) != dx_root_limit(dir,
268 + if (dx_get_limit(entries) != dx_root_limit(dir->i_sb->s_blocksize,
269 + (struct ext4_dir_entry_2*)bh->b_data,
270 info->info_length)) {
271 ext4_warning(dir->i_sb, "dx entry: limit != root limit");
273 @@ -609,7 +619,7 @@ static int htree_dirblock_to_tree(struct
274 de = (struct ext4_dir_entry_2 *) bh->b_data;
275 top = (struct ext4_dir_entry_2 *) ((char *) de +
276 dir->i_sb->s_blocksize -
277 - EXT4_DIR_REC_LEN(0));
278 + __EXT4_DIR_REC_LEN(0));
279 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
280 if (ext4_check_dir_entry(dir, NULL, de, bh,
281 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
282 @@ -1172,7 +1182,7 @@ dx_move_dirents(char *from, char *to, st
284 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
285 (from + (map->offs<<2));
286 - rec_len = EXT4_DIR_REC_LEN(de->name_len);
287 + rec_len = EXT4_DIR_REC_LEN(de);
288 memcpy (to, de, rec_len);
289 ((struct ext4_dir_entry_2 *) to)->rec_len =
290 ext4_rec_len_to_disk(rec_len, blocksize);
291 @@ -1196,7 +1206,7 @@ static struct ext4_dir_entry_2* dx_pack_
292 while ((char*)de < base + blocksize) {
293 next = ext4_next_entry(de, blocksize);
294 if (de->inode && de->name_len) {
295 - rec_len = EXT4_DIR_REC_LEN(de->name_len);
296 + rec_len = EXT4_DIR_REC_LEN(de);
298 memmove(to, de, rec_len);
299 to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
300 @@ -1326,10 +1336,16 @@ static int add_dirent_to_buf(handle_t *h
301 unsigned int offset = 0;
302 unsigned int blocksize = dir->i_sb->s_blocksize;
303 unsigned short reclen;
304 - int nlen, rlen, err;
305 + int nlen, rlen, err, dlen = 0;
306 + unsigned char *data;
309 - reclen = EXT4_DIR_REC_LEN(namelen);
310 + data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
313 + dlen = (*data) + 1;
315 + reclen = __EXT4_DIR_REC_LEN(namelen + dlen);
317 de = (struct ext4_dir_entry_2 *)bh->b_data;
318 top = bh->b_data + blocksize - reclen;
319 @@ -1338,7 +1354,7 @@ static int add_dirent_to_buf(handle_t *h
321 if (ext4_match(namelen, name, de))
323 - nlen = EXT4_DIR_REC_LEN(de->name_len);
324 + nlen = EXT4_DIR_REC_LEN(de);
325 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
326 if ((de->inode? rlen - nlen: rlen) >= reclen)
328 @@ -1356,7 +1372,7 @@ static int add_dirent_to_buf(handle_t *h
331 /* By now the buffer is marked for journaling */
332 - nlen = EXT4_DIR_REC_LEN(de->name_len);
333 + nlen = EXT4_DIR_REC_LEN(de);
334 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
336 struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
337 @@ -1372,6 +1388,12 @@ static int add_dirent_to_buf(handle_t *h
339 de->name_len = namelen;
340 memcpy(de->name, name, namelen);
342 + de->name[namelen] = 0;
343 + memcpy(&de->name[namelen + 1], data, *(char *) data);
344 + de->file_type |= EXT4_DIRENT_LUFID;
348 * XXX shouldn't update any times until successful
349 * completion of syscall, but too many callers depend
350 @@ -1468,7 +1490,8 @@ static int make_indexed_dir(handle_t *ha
352 dx_set_block(entries, 1);
353 dx_set_count(entries, 1);
354 - dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
355 + dx_set_limit(entries, dx_root_limit(dir->i_sb->s_blocksize,
356 + dot_de, sizeof(*dx_info)));
358 /* Initialize as for dx_probe */
359 hinfo.hash_version = dx_info->hash_version;
360 @@ -1511,6 +1534,8 @@ static int ext4_update_dotdot(handle_t *
361 struct buffer_head * dir_block;
362 struct ext4_dir_entry_2 * de;
363 int len, journal = 0, err = 0;
368 return PTR_ERR(handle);
369 @@ -1526,19 +1551,24 @@ static int ext4_update_dotdot(handle_t *
370 /* the first item must be "." */
371 assert(de->name_len == 1 && de->name[0] == '.');
372 len = le16_to_cpu(de->rec_len);
373 - assert(len >= EXT4_DIR_REC_LEN(1));
374 - if (len > EXT4_DIR_REC_LEN(1)) {
375 + assert(len >= __EXT4_DIR_REC_LEN(1));
376 + if (len > __EXT4_DIR_REC_LEN(1)) {
377 BUFFER_TRACE(dir_block, "get_write_access");
378 err = ext4_journal_get_write_access(handle, dir_block);
383 - de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
384 + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
387 - len -= EXT4_DIR_REC_LEN(1);
388 - assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
389 + len -= EXT4_DIR_REC_LEN(de);
390 + data = ext4_dentry_get_data(dir->i_sb,
391 + (struct ext4_dentry_param *) dentry->d_fsdata);
394 + assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen));
396 de = (struct ext4_dir_entry_2 *)
397 ((char *) de + le16_to_cpu(de->rec_len));
399 @@ -1552,10 +1582,15 @@ static int ext4_update_dotdot(handle_t *
401 de->rec_len = cpu_to_le16(len);
403 - assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
404 + assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2));
406 strcpy (de->name, "..");
407 - ext4_set_de_type(dir->i_sb, de, S_IFDIR);
408 + if (data != NULL && ext4_get_dirent_data_len(de) >= dlen) {
410 + memcpy(&de->name[2 + 1], data, *data);
411 + ext4_set_de_type(dir->i_sb, de, S_IFDIR);
412 + de->file_type |= EXT4_DIRENT_LUFID;
417 @@ -1994,12 +2029,13 @@ retry:
418 /* Initialize @inode as a subdirectory of @dir, and add the
419 * "." and ".." entries into the first directory block. */
420 int ext4_add_dot_dotdot(handle_t *handle, struct inode * dir,
421 - struct inode *inode)
422 + struct inode *inode,
423 + const void *data1, const void *data2)
425 struct buffer_head *dir_block;
426 struct ext4_dir_entry_2 *de;
427 unsigned int blocksize = dir->i_sb->s_blocksize;
429 + int err = 0, dot_reclen;
432 return PTR_ERR(handle);
433 @@ -2020,17 +2056,32 @@ int ext4_add_dot_dotdot(handle_t *handle
434 de = (struct ext4_dir_entry_2 *) dir_block->b_data;
435 de->inode = cpu_to_le32(inode->i_ino);
437 - de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
439 strcpy(de->name, ".");
440 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
441 + /* get packed fid data */
442 + data1 = ext4_dentry_get_data(dir->i_sb,
443 + (struct ext4_dentry_param *) data1);
446 + memcpy(&de->name[2], data1, *(char *) data1);
447 + de->file_type |= EXT4_DIRENT_LUFID;
449 + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
450 + dot_reclen = cpu_to_le16(de->rec_len);
451 de = ext4_next_entry(de, blocksize);
452 de->inode = cpu_to_le32(dir->i_ino);
453 - de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
454 + de->rec_len = ext4_rec_len_to_disk(blocksize - dot_reclen,
457 strcpy(de->name, "..");
458 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
459 + data2 = ext4_dentry_get_data(dir->i_sb,
460 + (struct ext4_dentry_param *) data2);
463 + memcpy(&de->name[3], data2, *(char *) data2);
464 + de->file_type |= EXT4_DIRENT_LUFID;
467 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
468 err = ext4_handle_dirty_metadata(handle, inode, dir_block);
469 @@ -2070,7 +2121,7 @@ retry:
473 - err = ext4_add_dot_dotdot(handle, dir, inode);
474 + err = ext4_add_dot_dotdot(handle, dir, inode, NULL, NULL);
476 goto out_clear_inode;
477 err = ext4_add_entry(handle, dentry, inode);
478 @@ -2108,7 +2159,7 @@ static int empty_dir(struct inode *inode
482 - if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
483 + if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2) ||
484 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
486 EXT4_ERROR_INODE(inode,
487 --- a/fs/ext4/super.c
488 +++ b/fs/ext4/super.c
489 @@ -1363,7 +1363,7 @@ enum {
490 Opt_data_err_abort, Opt_data_err_ignore,
491 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
492 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
493 - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
494 + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_dirdata,
495 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
496 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
497 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
498 @@ -1427,6 +1427,7 @@ static const match_table_t tokens = {
499 {Opt_iopen, "iopen"},
500 {Opt_noiopen, "noiopen"},
501 {Opt_iopen_nopriv, "iopen_nopriv"},
502 + {Opt_dirdata, "dirdata"},
503 {Opt_barrier, "barrier=%u"},
504 {Opt_barrier, "barrier"},
505 {Opt_nobarrier, "nobarrier"},
506 @@ -1840,6 +1841,9 @@ set_qf_format:
508 case Opt_iopen_nopriv:
511 + set_opt(sb, DIRDATA);