1 this patch implements feature which allows ext4 fs users (e.g. Lustre)
2 to store data in ext4 dirent.
3 data is stored in ext4 dirent after file-name, this space is accounted
4 in de->rec_len. flag EXT4_DIRENT_LUFID added to d_type if extra data
7 make use of dentry->d_fsdata to pass fid to ext4. so no
8 changes in ext4_add_entry() interface required.
12 @@ -37,11 +37,18 @@ static int ext4_dx_readdir(struct file *
14 static unsigned char get_dtype(struct super_block *sb, int filetype)
16 + int fl_index = filetype & EXT4_FT_MASK;
18 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
19 - (filetype >= EXT4_FT_MAX))
20 + (fl_index >= EXT4_FT_MAX))
23 - return (ext4_filetype_table[filetype]);
24 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
25 + return (ext4_filetype_table[fl_index]);
27 + return (ext4_filetype_table[fl_index]) |
28 + (filetype & EXT4_DIRENT_LUFID);
33 @@ -73,11 +80,11 @@ int ext4_check_dir_entry(const char *fun
34 const int rlen = ext4_rec_len_from_disk(de->rec_len,
35 dir->i_sb->s_blocksize);
37 - if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
38 + if (unlikely(rlen < __EXT4_DIR_REC_LEN(1)))
39 error_msg = "rec_len is smaller than minimal";
40 else if (unlikely(rlen % 4 != 0))
41 error_msg = "rec_len % 4 != 0";
42 - else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
43 + else if (unlikely(rlen < EXT4_DIR_REC_LEN(de)))
44 error_msg = "rec_len is too small for name_len";
45 else if (unlikely(((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
46 error_msg = "directory entry across blocks";
47 @@ -181,7 +188,7 @@ revalidate:
48 * failure will be detected in the
49 * dirent test below. */
50 if (ext4_rec_len_from_disk(de->rec_len,
51 - sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
52 + sb->s_blocksize) < __EXT4_DIR_REC_LEN(1))
54 i += ext4_rec_len_from_disk(de->rec_len,
56 @@ -457,12 +464,17 @@ int ext4_htree_store_dirent(struct file
57 struct fname *fname, *new_fn;
58 struct dir_private_info *info;
62 info = (struct dir_private_info *) dir_file->private_data;
63 p = &info->root.rb_node;
65 /* Create and allocate the fname structure */
66 - len = sizeof(struct fname) + dirent->name_len + 1;
67 + if (dirent->file_type & EXT4_DIRENT_LUFID)
68 + extra_data = ext4_get_dirent_data_len(dirent);
70 + len = sizeof(struct fname) + dirent->name_len + extra_data;
72 new_fn = kzalloc(len, GFP_KERNEL);
75 @@ -471,7 +483,7 @@ int ext4_htree_store_dirent(struct file
76 new_fn->inode = le32_to_cpu(dirent->inode);
77 new_fn->name_len = dirent->name_len;
78 new_fn->file_type = dirent->file_type;
79 - memcpy(new_fn->name, dirent->name, dirent->name_len);
80 + memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data);
81 new_fn->name[dirent->name_len] = 0;
86 @@ -1294,6 +1294,7 @@ EXT4_INODE_BIT_FNS(state, state_flags)
87 #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
88 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100
89 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
90 +#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000
92 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
93 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
94 @@ -1302,7 +1303,9 @@ EXT4_INODE_BIT_FNS(state, state_flags)
95 EXT4_FEATURE_INCOMPAT_EXTENTS| \
96 EXT4_FEATURE_INCOMPAT_64BIT| \
97 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
98 - EXT4_FEATURE_INCOMPAT_MMP)
99 + EXT4_FEATURE_INCOMPAT_MMP| \
100 + EXT4_FEATURE_INCOMPAT_DIRDATA)
102 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
103 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
104 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
105 @@ -1388,6 +1391,43 @@ struct ext4_dir_entry_2 {
106 #define EXT4_FT_SYMLINK 7
108 #define EXT4_FT_MAX 8
109 +#define EXT4_FT_MASK 0xf
111 +#if EXT4_FT_MAX > EXT4_FT_MASK
112 +#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
116 + * d_type has 4 unused bits, so it can hold four types data. these different
117 + * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
118 + * stored, in flag order, after file-name in ext4 dirent.
121 + * this flag is added to d_type if ext4 dirent has extra data after
122 + * filename. this data length is variable and length is stored in first byte
123 + * of data. data start after filename NUL byte.
124 + * This is used by Lustre FS.
126 +#define EXT4_DIRENT_LUFID 0x10
128 +#define EXT4_LUFID_MAGIC 0xAD200907UL
129 +struct ext4_dentry_param {
130 + __u32 edp_magic; /* EXT4_LUFID_MAGIC */
131 + char edp_len; /* size of edp_data in bytes */
132 + char edp_data[0]; /* packed array of data */
133 +} __attribute__((packed));
135 +static inline unsigned char *ext4_dentry_get_data(struct super_block *sb,
136 + struct ext4_dentry_param* p)
139 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
141 + if (p && p->edp_magic == EXT4_LUFID_MAGIC)
142 + return &p->edp_len;
148 * EXT4_DIR_PAD defines the directory entries boundaries
149 @@ -1396,8 +1436,11 @@ struct ext4_dir_entry_2 {
151 #define EXT4_DIR_PAD 4
152 #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
153 -#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
154 +#define __EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
156 +#define EXT4_DIR_REC_LEN(de) (__EXT4_DIR_REC_LEN(de->name_len +\
157 + ext4_get_dirent_data_len(de)))
159 #define EXT4_MAX_REC_LEN ((1<<16)-1)
161 static inline unsigned int
162 @@ -1791,7 +1834,7 @@ extern struct buffer_head * ext4_find_en
163 struct ext4_dir_entry_2 ** res_dir);
164 #define ll_ext4_find_entry(inode, dentry, res_dir) ext4_find_entry(inode, &(dentry)->d_name, res_dir)
165 extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
166 - struct inode *inode);
167 + struct inode *inode, const void *, const void *);
168 extern struct buffer_head *ext4_append(handle_t *handle,
170 ext4_lblk_t *block, int *err);
171 @@ -2143,6 +2186,28 @@ extern wait_queue_head_t aio_wq[];
172 #define to_aio_wq(v) (&aio_wq[((unsigned long)v) % WQ_HASH_SZ])
173 extern void ext4_aio_wait(struct inode *inode);
176 + * Compute the total directory entry data length.
177 + * This includes the filename and an implicit NUL terminator (always present),
178 + * and optional extensions. Each extension has a bit set in the high 4 bits of
179 + * de->file_type, and the extension length is the first byte in each entry.
181 +static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
183 + char *len = de->name + de->name_len + 1 /* NUL terminator */;
185 + __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
187 + while (extra_data_flags) {
188 + if (extra_data_flags & 1) {
189 + dlen += *len + (dlen == 0);
192 + extra_data_flags >>= 1;
197 #endif /* __KERNEL__ */
200 --- a/fs/ext4/namei.c
201 +++ b/fs/ext4/namei.c
202 @@ -169,7 +169,8 @@ static unsigned dx_get_count(struct dx_e
203 static unsigned dx_get_limit(struct dx_entry *entries);
204 static void dx_set_count(struct dx_entry *entries, unsigned value);
205 static void dx_set_limit(struct dx_entry *entries, unsigned value);
206 -static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
207 +static inline unsigned dx_root_limit(__u32 blocksize,
208 + struct ext4_dir_entry_2 *dot_de, unsigned infosize);
209 static unsigned dx_node_limit(struct inode *dir);
210 static struct dx_frame *dx_probe(const struct qstr *d_name,
212 @@ -212,11 +213,12 @@ ext4_next_entry(struct ext4_dir_entry_2
214 struct dx_root_info * dx_get_dx_info(struct ext4_dir_entry_2 *de)
216 - /* get dotdot first */
217 - de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
218 + BUG_ON(de->name_len != 1);
219 + /* get dotdot first */
220 + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
222 - /* dx root info is after dotdot entry */
223 - de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
224 + /* dx root info is after dotdot entry */
225 + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
227 return (struct dx_root_info *) de;
229 @@ -261,16 +263,23 @@ static inline void dx_set_limit(struct d
230 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
233 -static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
234 +static inline unsigned dx_root_limit(__u32 blocksize,
235 + struct ext4_dir_entry_2 *dot_de, unsigned infosize)
237 - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
238 - EXT4_DIR_REC_LEN(2) - infosize;
239 + struct ext4_dir_entry_2 *dotdot_de;
240 + unsigned entry_space;
242 + BUG_ON(dot_de->name_len != 1);
243 + dotdot_de = ext4_next_entry(dot_de, blocksize);
244 + entry_space = blocksize - EXT4_DIR_REC_LEN(dot_de) -
245 + EXT4_DIR_REC_LEN(dotdot_de) - infosize;
247 return entry_space / sizeof(struct dx_entry);
250 static inline unsigned dx_node_limit(struct inode *dir)
252 - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
253 + unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0);
254 return entry_space / sizeof(struct dx_entry);
257 @@ -317,7 +326,7 @@ static struct stats dx_show_leaf(struct
258 printk(":%x.%u ", h.hash,
259 ((char *) de - base));
261 - space += EXT4_DIR_REC_LEN(de->name_len);
262 + space += EXT4_DIR_REC_LEN(de);
265 de = ext4_next_entry(de, size);
266 @@ -419,7 +428,8 @@ dx_probe(const struct qstr *d_name, stru
268 entries = (struct dx_entry *) (((char *)info) + info->info_length);
270 - if (dx_get_limit(entries) != dx_root_limit(dir,
271 + if (dx_get_limit(entries) != dx_root_limit(dir->i_sb->s_blocksize,
272 + (struct ext4_dir_entry_2*)bh->b_data,
273 info->info_length)) {
274 ext4_warning(dir->i_sb, "dx entry: limit != root limit");
276 @@ -608,7 +618,7 @@ static int htree_dirblock_to_tree(struct
277 de = (struct ext4_dir_entry_2 *) bh->b_data;
278 top = (struct ext4_dir_entry_2 *) ((char *) de +
279 dir->i_sb->s_blocksize -
280 - EXT4_DIR_REC_LEN(0));
281 + __EXT4_DIR_REC_LEN(0));
282 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
283 if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
284 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
285 @@ -1020,7 +1030,7 @@ static struct buffer_head * ext4_dx_find
287 de = (struct ext4_dir_entry_2 *) bh->b_data;
288 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
289 - EXT4_DIR_REC_LEN(0));
290 + __EXT4_DIR_REC_LEN(0));
291 for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
292 int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
293 + ((char *) de - bh->b_data);
294 @@ -1181,7 +1191,7 @@ dx_move_dirents(char *from, char *to, st
296 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
297 (from + (map->offs<<2));
298 - rec_len = EXT4_DIR_REC_LEN(de->name_len);
299 + rec_len = EXT4_DIR_REC_LEN(de);
300 memcpy (to, de, rec_len);
301 ((struct ext4_dir_entry_2 *) to)->rec_len =
302 ext4_rec_len_to_disk(rec_len, blocksize);
303 @@ -1205,7 +1215,7 @@ static struct ext4_dir_entry_2* dx_pack_
304 while ((char*)de < base + blocksize) {
305 next = ext4_next_entry(de, blocksize);
306 if (de->inode && de->name_len) {
307 - rec_len = EXT4_DIR_REC_LEN(de->name_len);
308 + rec_len = EXT4_DIR_REC_LEN(de);
310 memmove(to, de, rec_len);
311 to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
312 @@ -1335,10 +1345,16 @@ static int add_dirent_to_buf(handle_t *h
313 unsigned int offset = 0;
314 unsigned int blocksize = dir->i_sb->s_blocksize;
315 unsigned short reclen;
316 - int nlen, rlen, err;
317 + int nlen, rlen, err, dlen = 0;
318 + unsigned char *data;
321 - reclen = EXT4_DIR_REC_LEN(namelen);
322 + data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
325 + dlen = (*data) + 1;
327 + reclen = __EXT4_DIR_REC_LEN(namelen + dlen);
329 de = (struct ext4_dir_entry_2 *)bh->b_data;
330 top = bh->b_data + blocksize - reclen;
331 @@ -1348,7 +1364,7 @@ static int add_dirent_to_buf(handle_t *h
333 if (ext4_match(namelen, name, de))
335 - nlen = EXT4_DIR_REC_LEN(de->name_len);
336 + nlen = EXT4_DIR_REC_LEN(de);
337 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
338 if ((de->inode? rlen - nlen: rlen) >= reclen)
340 @@ -1366,7 +1382,7 @@ static int add_dirent_to_buf(handle_t *h
343 /* By now the buffer is marked for journaling */
344 - nlen = EXT4_DIR_REC_LEN(de->name_len);
345 + nlen = EXT4_DIR_REC_LEN(de);
346 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
348 struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
349 @@ -1382,6 +1398,12 @@ static int add_dirent_to_buf(handle_t *h
351 de->name_len = namelen;
352 memcpy(de->name, name, namelen);
354 + de->name[namelen] = 0;
355 + memcpy(&de->name[namelen + 1], data, *(char *) data);
356 + de->file_type |= EXT4_DIRENT_LUFID;
360 * XXX shouldn't update any times until successful
361 * completion of syscall, but too many callers depend
362 @@ -1480,7 +1502,8 @@ static int make_indexed_dir(handle_t *ha
364 dx_set_block(entries, 1);
365 dx_set_count(entries, 1);
366 - dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
367 + dx_set_limit(entries, dx_root_limit(dir->i_sb->s_blocksize,
368 + dot_de, sizeof(*dx_info)));
370 /* Initialize as for dx_probe */
371 hinfo.hash_version = dx_info->hash_version;
372 @@ -1523,6 +1546,8 @@ static int ext4_update_dotdot(handle_t *
373 struct buffer_head * dir_block;
374 struct ext4_dir_entry_2 * de;
375 int len, journal = 0, err = 0;
380 return PTR_ERR(handle);
381 @@ -1538,19 +1563,24 @@ static int ext4_update_dotdot(handle_t *
382 /* the first item must be "." */
383 assert(de->name_len == 1 && de->name[0] == '.');
384 len = le16_to_cpu(de->rec_len);
385 - assert(len >= EXT4_DIR_REC_LEN(1));
386 - if (len > EXT4_DIR_REC_LEN(1)) {
387 + assert(len >= __EXT4_DIR_REC_LEN(1));
388 + if (len > __EXT4_DIR_REC_LEN(1)) {
389 BUFFER_TRACE(dir_block, "get_write_access");
390 err = ext4_journal_get_write_access(handle, dir_block);
395 - de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
396 + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
399 - len -= EXT4_DIR_REC_LEN(1);
400 - assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
401 + len -= EXT4_DIR_REC_LEN(de);
402 + data = ext4_dentry_get_data(dir->i_sb,
403 + (struct ext4_dentry_param *) dentry->d_fsdata);
406 + assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen));
408 de = (struct ext4_dir_entry_2 *)
409 ((char *) de + le16_to_cpu(de->rec_len));
411 @@ -1564,10 +1594,15 @@ static int ext4_update_dotdot(handle_t *
413 de->rec_len = cpu_to_le16(len);
415 - assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
416 + assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2));
418 strcpy (de->name, "..");
419 - ext4_set_de_type(dir->i_sb, de, S_IFDIR);
420 + if (data != NULL && ext4_get_dirent_data_len(de) >= dlen) {
422 + memcpy(&de->name[2 + 1], data, *data);
423 + ext4_set_de_type(dir->i_sb, de, S_IFDIR);
424 + de->file_type |= EXT4_DIRENT_LUFID;
429 @@ -1989,12 +2024,13 @@ retry:
430 /* Initialize @inode as a subdirectory of @dir, and add the
431 * "." and ".." entries into the first directory block. */
432 int ext4_add_dot_dotdot(handle_t *handle, struct inode * dir,
433 - struct inode *inode)
434 + struct inode *inode,
435 + const void *data1, const void *data2)
437 struct buffer_head * dir_block;
438 struct ext4_dir_entry_2 * de;
439 unsigned int blocksize = dir->i_sb->s_blocksize;
441 + int err = 0, dot_reclen;
444 return PTR_ERR(handle);
445 @@ -2015,17 +2051,32 @@ int ext4_add_dot_dotdot(handle_t *handle
446 de = (struct ext4_dir_entry_2 *) dir_block->b_data;
447 de->inode = cpu_to_le32(inode->i_ino);
449 - de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
451 strcpy(de->name, ".");
452 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
453 + /* get packed fid data*/
454 + data1 = ext4_dentry_get_data(dir->i_sb,
455 + (struct ext4_dentry_param *) data1);
458 + memcpy(&de->name[2], data1, *(char *) data1);
459 + de->file_type |= EXT4_DIRENT_LUFID;
461 + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
462 + dot_reclen = cpu_to_le16(de->rec_len);
463 de = ext4_next_entry(de, blocksize);
464 de->inode = cpu_to_le32(dir->i_ino);
465 - de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
466 + de->rec_len = ext4_rec_len_to_disk(blocksize - dot_reclen,
469 strcpy(de->name, "..");
470 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
471 + data2 = ext4_dentry_get_data(dir->i_sb,
472 + (struct ext4_dentry_param *) data2);
475 + memcpy(&de->name[3], data2, *(char *) data2);
476 + de->file_type |= EXT4_DIRENT_LUFID;
479 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
480 err = ext4_handle_dirty_metadata(handle, inode, dir_block);
481 @@ -2064,7 +2115,7 @@ retry:
485 - err = ext4_add_dot_dotdot(handle, dir, inode);
486 + err = ext4_add_dot_dotdot(handle, dir, inode, NULL, NULL);
488 goto out_clear_inode;
490 @@ -2103,7 +2154,7 @@ static int empty_dir(struct inode *inode
494 - if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
495 + if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2) ||
496 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
498 ext4_error(inode->i_sb,