1 this patch implements feature which allows ext3 fs users (e.g. Lustre)
2 to store data in ext3 dirent.
3 data is stored in ext3 dirent after file-name, this space is accounted
4 in de->rec_len. flag EXT3_DIRENT_LUFID added to d_type if extra data
7 make use of dentry->d_fsdata to pass fid to ext3. so no
8 changes in ext3_add_entry() interface required.
10 Index: b/fs/ext3/namei.c
11 ===================================================================
14 @@ -174,7 +174,8 @@ static unsigned dx_get_count (struct dx_
15 static unsigned dx_get_limit (struct dx_entry *entries);
16 static void dx_set_count (struct dx_entry *entries, unsigned value);
17 static void dx_set_limit (struct dx_entry *entries, unsigned value);
18 -static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
19 +static unsigned dx_root_limit (__u32 blocksize, struct ext3_dir_entry_2*,
21 static unsigned dx_node_limit (struct inode *dir);
22 static struct dx_frame *dx_probe(struct dentry *dentry,
24 @@ -204,11 +205,13 @@ static int ext3_dx_add_entry(handle_t *h
26 struct dx_root_info * dx_get_dx_info(struct ext3_dir_entry_2 *de)
28 + BUG_ON(de->name_len != 1);
30 /* get dotdot first */
31 de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
33 /* dx root info is after dotdot entry */
34 - de = (struct ext3_dir_entry_2 *)((char *)de + EXT3_DIR_REC_LEN(2));
35 + de = (struct ext3_dir_entry_2 *)((char *)de + EXT3_DIR_REC_LEN(de));
37 return (struct dx_root_info *) de;
39 @@ -253,16 +256,24 @@ static inline void dx_set_limit (struct
40 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
43 -static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
44 +static inline unsigned dx_root_limit (__u32 blocksize,
45 + struct ext3_dir_entry_2 * dot_de, unsigned infosize)
47 - unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
48 - EXT3_DIR_REC_LEN(2) - infosize;
49 - return 0? 20: entry_space / sizeof(struct dx_entry);
50 + struct ext3_dir_entry_2 * dotdot_de;
51 + unsigned entry_space;
53 + BUG_ON(dot_de->name_len != 1);
54 + dotdot_de = (struct ext3_dir_entry_2 *)((char *)dot_de
55 + + le16_to_cpu(dot_de->rec_len));
56 + entry_space = blocksize - EXT3_DIR_REC_LEN(dot_de) -
57 + EXT3_DIR_REC_LEN(dotdot_de) - infosize;
59 + return entry_space / sizeof(struct dx_entry);
62 static inline unsigned dx_node_limit (struct inode *dir)
64 - unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
65 + unsigned entry_space = dir->i_sb->s_blocksize - __EXT3_DIR_REC_LEN(0);
66 return 0? 22: entry_space / sizeof(struct dx_entry);
69 @@ -309,7 +320,7 @@ static struct stats dx_show_leaf(struct
70 printk(":%x.%u ", h.hash,
71 ((char *) de - base));
73 - space += EXT3_DIR_REC_LEN(de->name_len);
74 + space += EXT3_DIR_REC_LEN(de);
77 de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
78 @@ -411,7 +422,9 @@ dx_probe(struct dentry *dentry, struct i
80 entries = (struct dx_entry *) (((char *)info) + info->info_length);
82 - if (dx_get_limit(entries) != dx_root_limit(dir, info->info_length)) {
83 + if (dx_get_limit(entries) != dx_root_limit(dir->i_sb->s_blocksize,
84 + (struct ext3_dir_entry_2*)bh->b_data,
85 + info->info_length)) {
86 ext3_warning(dir->i_sb, __FUNCTION__,
87 "dx entry: limit != root limit");
89 @@ -468,14 +481,17 @@ dx_probe(struct dentry *dentry, struct i
90 if (!indirect--) return frame;
91 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
93 - at = entries = ((struct dx_node *) bh->b_data)->entries;
94 + entries = ((struct dx_node *) bh->b_data)->entries;
95 if (dx_get_limit(entries) != dx_node_limit (dir)) {
96 ext3_warning(dir->i_sb, __FUNCTION__,
97 - "dx entry: limit != node limit");
98 + "block %u(%lu): limit %u != node limit %u",
99 + dx_get_block(at), (long)bh->b_blocknr,
100 + dx_get_limit(entries), dx_node_limit(dir));
102 *err = ERR_BAD_DX_DIR;
109 @@ -608,7 +624,7 @@ static int htree_dirblock_to_tree(struct
110 de = (struct ext3_dir_entry_2 *) bh->b_data;
111 top = (struct ext3_dir_entry_2 *) ((char *) de +
112 dir->i_sb->s_blocksize -
113 - EXT3_DIR_REC_LEN(0));
114 + __EXT3_DIR_REC_LEN(0));
115 for (; de < top; de = ext3_next_entry(de)) {
116 if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
117 (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
118 @@ -1001,7 +1015,7 @@ static struct buffer_head * ext3_dx_find
120 de = (struct ext3_dir_entry_2 *) bh->b_data;
121 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
122 - EXT3_DIR_REC_LEN(0));
123 + __EXT3_DIR_REC_LEN(0));
124 for (; de < top; de = ext3_next_entry(de))
125 if (ext3_match (namelen, name, de)) {
126 if (!ext3_check_dir_entry("ext3_find_entry",
127 @@ -1183,7 +1197,7 @@ dx_move_dirents(char *from, char *to, st
130 struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
131 - rec_len = EXT3_DIR_REC_LEN(de->name_len);
132 + rec_len = EXT3_DIR_REC_LEN(de);
133 memcpy (to, de, rec_len);
134 ((struct ext3_dir_entry_2 *) to)->rec_len =
135 cpu_to_le16(rec_len);
136 @@ -1208,7 +1222,7 @@ static struct ext3_dir_entry_2* dx_pack_
137 next = (struct ext3_dir_entry_2 *) ((char *) de +
138 le16_to_cpu(de->rec_len));
139 if (de->inode && de->name_len) {
140 - rec_len = EXT3_DIR_REC_LEN(de->name_len);
141 + rec_len = EXT3_DIR_REC_LEN(de);
143 memmove(to, de, rec_len);
144 to->rec_len = cpu_to_le16(rec_len);
145 @@ -1334,12 +1348,18 @@ static int add_dirent_to_buf(handle_t *h
146 struct inode *dir = dentry->d_parent->d_inode;
147 const char *name = dentry->d_name.name;
148 int namelen = dentry->d_name.len;
149 + unsigned char *data;
150 unsigned long offset = 0;
151 unsigned short reclen;
152 - int nlen, rlen, err;
153 + int nlen, rlen, err, dlen = 0;
156 - reclen = EXT3_DIR_REC_LEN(namelen);
157 + data = ext3_dentry_get_data(inode->i_sb, (struct ext3_dentry_param *)
162 + reclen = __EXT3_DIR_REC_LEN(namelen + dlen);
164 de = (struct ext3_dir_entry_2 *)bh->b_data;
165 top = bh->b_data + dir->i_sb->s_blocksize - reclen;
166 @@ -1353,7 +1373,7 @@ static int add_dirent_to_buf(handle_t *h
170 - nlen = EXT3_DIR_REC_LEN(de->name_len);
171 + nlen = EXT3_DIR_REC_LEN(de);
172 rlen = le16_to_cpu(de->rec_len);
173 if ((de->inode? rlen - nlen: rlen) >= reclen)
175 @@ -1372,7 +1392,7 @@ static int add_dirent_to_buf(handle_t *h
178 /* By now the buffer is marked for journaling */
179 - nlen = EXT3_DIR_REC_LEN(de->name_len);
180 + nlen = EXT3_DIR_REC_LEN(de);
181 rlen = le16_to_cpu(de->rec_len);
183 struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
184 @@ -1388,6 +1408,11 @@ static int add_dirent_to_buf(handle_t *h
186 de->name_len = namelen;
187 memcpy (de->name, name, namelen);
189 + de->name[namelen] = 0;
190 + memcpy(&de->name[namelen + 1], data, *(char*) data);
191 + de->file_type |= EXT3_DIRENT_LUFID;
194 * XXX shouldn't update any times until successful
195 * completion of syscall, but too many callers depend
196 @@ -1474,7 +1499,8 @@ static int make_indexed_dir(handle_t *ha
197 entries = (void *)dx_info + sizeof(*dx_info);
198 dx_set_block (entries, 1);
199 dx_set_count (entries, 1);
200 - dx_set_limit (entries, dx_root_limit(dir, sizeof(*dx_info)));
201 + dx_set_limit (entries, dx_root_limit(dir->i_sb->s_blocksize,
202 + dot_de, sizeof(*dx_info)));
204 /* Initialize as for dx_probe */
205 hinfo.hash_version = dx_info->hash_version;
206 @@ -1502,6 +1528,8 @@ static int ext3_update_dotdot(handle_t *
207 struct buffer_head * dir_block;
208 struct ext3_dir_entry_2 * de;
209 int len, journal = 0, err = 0;
214 return PTR_ERR(handle);
215 @@ -1517,19 +1545,23 @@ static int ext3_update_dotdot(handle_t *
216 /* the first item must be "." */
217 assert(de->name_len == 1 && de->name[0] == '.');
218 len = le16_to_cpu(de->rec_len);
219 - assert(len >= EXT3_DIR_REC_LEN(1));
220 - if (len > EXT3_DIR_REC_LEN(1)) {
221 + assert(len >= __EXT3_DIR_REC_LEN(1));
222 + if (len > __EXT3_DIR_REC_LEN(1)) {
223 BUFFER_TRACE(dir_block, "get_write_access");
224 err = ext3_journal_get_write_access(handle, dir_block);
229 - de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(1));
230 + de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de));
233 - len -= EXT3_DIR_REC_LEN(1);
234 - assert(len == 0 || len >= EXT3_DIR_REC_LEN(2));
235 + len -= EXT3_DIR_REC_LEN(de);
236 + data = ext3_dentry_get_data(dir->i_sb,
237 + (struct ext3_dentry_param *) dentry->d_fsdata);
240 + assert(len == 0 || len >= __EXT3_DIR_REC_LEN(2 + dlen));
241 de = (struct ext3_dir_entry_2 *)
242 ((char *) de + le16_to_cpu(de->rec_len));
244 @@ -1543,11 +1575,15 @@ static int ext3_update_dotdot(handle_t *
246 de->rec_len = cpu_to_le16(len);
248 - assert(le16_to_cpu(de->rec_len) >= EXT3_DIR_REC_LEN(2));
249 + assert(le16_to_cpu(de->rec_len) >= __EXT3_DIR_REC_LEN(2));
251 strcpy (de->name, "..");
252 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
256 + memcpy(&de->name[2 + 1], data, dlen);
257 + de->file_type |= EXT3_DIRENT_LUFID;
261 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
262 @@ -1958,12 +1994,12 @@ retry:
264 /* Initialize @inode as a subdirectory of @dir, and add the
265 * "." and ".." entries into the first directory block. */
266 -int ext3_add_dot_dotdot(handle_t *handle, struct inode * dir,
267 - struct inode *inode)
268 +int ext3_add_dot_dotdot(handle_t *handle, struct inode * dir, struct inode *inode,
269 + const void *data1, const void *data2)
271 struct buffer_head * dir_block;
272 struct ext3_dir_entry_2 * de;
274 + int err = 0, dot_reclen;
277 return PTR_ERR(handle);
278 @@ -1983,16 +2019,36 @@ int ext3_add_dot_dotdot(handle_t *handle
279 de = (struct ext3_dir_entry_2 *) dir_block->b_data;
280 de->inode = cpu_to_le32(inode->i_ino);
282 - de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
283 - strcpy (de->name, ".");
284 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
285 + strcpy (de->name, ".");
287 + /* get packed fid data*/
288 + data1 = ext3_dentry_get_data(dir->i_sb,
289 + (struct ext3_dentry_param *) data1);
292 + memcpy(&de->name[2], data1, *(char*) data1);
293 + de->file_type |= EXT3_DIRENT_LUFID;
295 + de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de));
296 + dot_reclen = cpu_to_le16(de->rec_len);
298 de = (struct ext3_dir_entry_2 *)
299 ((char *) de + le16_to_cpu(de->rec_len));
300 de->inode = cpu_to_le32(dir->i_ino);
301 - de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
302 + de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-dot_reclen);
304 strcpy (de->name, "..");
305 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
307 + /* get packed fid data*/
308 + data2 = ext3_dentry_get_data(dir->i_sb,
309 + (struct ext3_dentry_param *) data2);
312 + memcpy(&de->name[3], data2, *(char*) data2);
313 + de->file_type |= EXT3_DIRENT_LUFID;
316 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
317 ext3_journal_dirty_metadata(handle, dir_block);
318 @@ -2029,7 +2085,7 @@ retry:
322 - err = ext3_add_dot_dotdot(handle, dir, inode);
323 + err = ext3_add_dot_dotdot(handle, dir, inode, NULL, NULL);
326 ext3_mark_inode_dirty(handle, inode);
327 @@ -2067,7 +2123,7 @@ static int empty_dir (struct inode * ino
331 - if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
332 + if (inode->i_size < __EXT3_DIR_REC_LEN(1) + __EXT3_DIR_REC_LEN(2) ||
333 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
335 ext3_error(inode->i_sb, __FUNCTION__,
336 Index: b/include/linux/ext3_fs.h
337 ===================================================================
338 --- a/include/linux/ext3_fs.h
339 +++ b/include/linux/ext3_fs.h
340 @@ -717,13 +717,16 @@ static inline int ext3_valid_inum(struct
341 #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010
342 #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
343 #define EXT3_FEATURE_INCOMPAT_MMP 0x0100
344 +#define EXT3_FEATURE_INCOMPAT_DIRDATA 0x1000
346 #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
347 #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
348 EXT3_FEATURE_INCOMPAT_RECOVER| \
349 EXT3_FEATURE_INCOMPAT_META_BG| \
350 EXT3_FEATURE_INCOMPAT_EXTENTS| \
351 - EXT3_FEATURE_INCOMPAT_MMP)
352 + EXT3_FEATURE_INCOMPAT_MMP| \
353 + EXT3_FEATURE_INCOMPAT_DIRDATA)
355 #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
356 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
357 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
358 @@ -790,7 +793,44 @@ struct ext3_dir_entry_2 {
359 #define EXT3_FT_SYMLINK 7
361 #define EXT3_FT_MAX 8
362 +#define EXT3_FT_MASK 0xf
364 +#if EXT3_FT_MAX > EXT3_FT_MASK
365 +#error "conflicting EXT3_FT_MAX and EXT3_FT_MASK"
368 +#define EXT3_LUFID_MAGIC 0xAD200907UL
369 +struct ext3_dentry_param {
370 + __u32 edp_magic; /* EXT3_LUFID_MAGIC */
371 + char edp_len; /* size of edp_data in bytes */
372 + char edp_data[0]; /* packed array of data */
373 +} __attribute__((packed));
375 +static inline unsigned char *ext3_dentry_get_data(struct super_block *sb,
376 + struct ext3_dentry_param* p)
379 + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_DIRDATA))
381 + if (p && p->edp_magic == EXT3_LUFID_MAGIC)
382 + return &p->edp_len;
388 + * d_type has 4 unused bits, so it can hold four types data. these different
389 + * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
390 + * stored, in flag order, after file-name in ext3 dirent.
393 + * this flag is added to d_type if ext3 dirent has extra data after
394 + * filename. this data length is variable and length is stored in first byte
395 + * of data. data start after filename NUL byte.
396 + * This is used by Lustre FS.
399 +#define EXT3_DIRENT_LUFID 0x10
401 * EXT3_DIR_PAD defines the directory entries boundaries
403 @@ -798,8 +838,12 @@ struct ext3_dir_entry_2 {
405 #define EXT3_DIR_PAD 4
406 #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
407 -#define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
408 +#define __EXT3_DIR_REC_LEN(len) (((len) + 8 + EXT3_DIR_ROUND) & \
411 +#define EXT3_DIR_REC_LEN(de) (__EXT3_DIR_REC_LEN(de->name_len +\
412 + ext3_get_dirent_data_len(de)))
415 * Hash Tree Directory indexing
416 * (c) Daniel Phillips, 2001
417 @@ -1125,7 +1169,8 @@ extern struct buffer_head * ext3_find_en
418 struct ext3_dir_entry_2
420 extern int ext3_add_dot_dotdot(handle_t *handle, struct inode *dir,
421 - struct inode *inode);
422 + struct inode *inode, const void *data1,
423 + const void *data2);
424 extern struct inode_operations ext3_dir_inode_operations;
425 extern struct inode_operations ext3_special_inode_operations;
426 extern struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
427 @@ -1159,6 +1204,29 @@ ext3_get_blocks_wrap(handle_t *handle, s
432 + * Compute the total directory entry data length.
433 + * This includes the filename and an implicit NUL terminator (always present),
434 + * and optional extensions. Each extension has a bit set in the high 4 bits of
435 + * de->file_type, and the extension length is the first byte in each entry.
438 +static inline int ext3_get_dirent_data_len(struct ext3_dir_entry_2 *de)
440 + char *len = de->name + de->name_len + 1 /* NUL terminator */;
442 + __u8 extra_data_flags = (de->file_type & ~EXT3_FT_MASK) >> 4;
444 + while (extra_data_flags) {
445 + if (extra_data_flags & 1) {
446 + dlen += *len + (dlen == 0);
449 + extra_data_flags >>= 1;
454 #endif /* __KERNEL__ */
456 /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
457 Index: b/fs/ext3/dir.c
458 ===================================================================
461 @@ -53,11 +53,17 @@ const struct file_operations ext3_dir_op
463 static unsigned char get_dtype(struct super_block *sb, int filetype)
465 + int fl_index = filetype & EXT3_FT_MASK;
467 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
468 - (filetype >= EXT3_FT_MAX))
469 + (fl_index >= EXT3_FT_MAX))
472 - return (ext3_filetype_table[filetype]);
473 + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_DIRDATA))
474 + return (ext3_filetype_table[fl_index]);
476 + return (ext3_filetype_table[fl_index]) |
477 + (filetype & EXT3_DIRENT_LUFID);
481 @@ -69,11 +75,11 @@ int ext3_check_dir_entry (const char * f
482 const char * error_msg = NULL;
483 const int rlen = le16_to_cpu(de->rec_len);
485 - if (rlen < EXT3_DIR_REC_LEN(1))
486 + if (rlen < __EXT3_DIR_REC_LEN(1))
487 error_msg = "rec_len is smaller than minimal";
488 else if (rlen % 4 != 0)
489 error_msg = "rec_len % 4 != 0";
490 - else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
491 + else if (rlen < EXT3_DIR_REC_LEN(de))
492 error_msg = "rec_len is too small for name_len";
493 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
494 error_msg = "directory entry across blocks";
495 @@ -177,7 +183,7 @@ revalidate:
496 * failure will be detected in the
497 * dirent test below. */
498 if (le16_to_cpu(de->rec_len) <
499 - EXT3_DIR_REC_LEN(1))
500 + __EXT3_DIR_REC_LEN(1))
502 i += le16_to_cpu(de->rec_len);
504 @@ -210,7 +216,6 @@ revalidate:
505 * during the copy operation.
507 unsigned long version = filp->f_version;
509 error = filldir(dirent, de->name,
512 @@ -342,13 +347,17 @@ int ext3_htree_store_dirent(struct file
513 struct rb_node **p, *parent = NULL;
514 struct fname * fname, *new_fn;
515 struct dir_private_info *info;
516 + int extra_data = 1;
519 info = (struct dir_private_info *) dir_file->private_data;
520 p = &info->root.rb_node;
522 /* Create and allocate the fname structure */
523 - len = sizeof(struct fname) + dirent->name_len + 1;
524 + if (dirent->file_type & EXT3_DIRENT_LUFID)
525 + extra_data = ext3_get_dirent_data_len(dirent);
527 + len = sizeof(struct fname) + dirent->name_len + extra_data;
528 new_fn = kmalloc(len, GFP_KERNEL);
531 @@ -358,7 +367,7 @@ int ext3_htree_store_dirent(struct file
532 new_fn->inode = le32_to_cpu(dirent->inode);
533 new_fn->name_len = dirent->name_len;
534 new_fn->file_type = dirent->file_type;
535 - memcpy(new_fn->name, dirent->name, dirent->name_len);
536 + memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data);
537 new_fn->name[dirent->name_len] = 0;