1 # This is a BitKeeper generated patch for the following project:
2 # Project Name: Linux kernel tree
3 # This patch format is intended for GNU patch command version 2.5 or higher.
4 # This patch includes the following deltas:
5 # ChangeSet 1.809 -> 1.810
6 # fs/ext3/Makefile 1.4 -> 1.5
7 # include/linux/ext3_jbd.h 1.5 -> 1.6
8 # fs/ext3/ialloc.c 1.17 -> 1.18
9 # fs/ext3/symlink.c 1.3 -> 1.4
10 # fs/Makefile 1.42 -> 1.43
11 # fs/ext3/namei.c 1.22 -> 1.23
12 # include/linux/ext3_fs.h 1.11 -> 1.12
13 # fs/Config.in 1.39 -> 1.40
14 # fs/ext3/inode.c 1.42 -> 1.43
15 # fs/Config.help 1.21 -> 1.22
16 # fs/ext3/super.c 1.33 -> 1.34
17 # fs/ext3/file.c 1.9 -> 1.10
18 # (new) -> 1.1 fs/ext3/xattr.h
19 # (new) -> 1.1 include/linux/mbcache.h
20 # (new) -> 1.1 fs/ext3/xattr.c
21 # (new) -> 1.1 fs/mbcache.c
22 # (new) -> 1.1 fs/ext3/xattr_user.c
24 # The following is the BitKeeper ChangeSet Log
25 # --------------------------------------------
26 # 02/10/20 braam@clusterfs.com 1.810
27 # xattrs for UML bk repository
28 # --------------------------------------------
30 diff -Nru a/fs/Config.help b/fs/Config.help
31 --- a/fs/Config.help Sun Dec 8 02:49:56 2002
32 +++ b/fs/Config.help Sun Dec 8 02:49:56 2002
34 of your root partition (the one containing the directory /) cannot
35 be compiled as a module, and so this may be dangerous.
38 + Extended attributes are name:value pairs associated with inodes by
39 + the kernel or by users (see the attr(5) manual page, or visit
40 + <http://acl.bestbits.at/> for details).
45 This is a generic journaling layer for block devices. It is
46 currently used by the ext3 file system, but it could also be used to
47 diff -Nru a/fs/Config.in b/fs/Config.in
48 --- a/fs/Config.in Sun Dec 8 02:49:56 2002
49 +++ b/fs/Config.in Sun Dec 8 02:49:56 2002
51 dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
53 tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS
54 +dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS
55 # CONFIG_JBD could be its own option (even modular), but until there are
56 # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS
57 # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS
59 define_tristate CONFIG_ZISOFS_FS $CONFIG_ISO9660_FS
61 define_tristate CONFIG_ZISOFS_FS n
64 +# Meta block cache for Extended Attributes (ext2/ext3)
65 +if [ "$CONFIG_EXT2_FS_XATTR" = "y" -o "$CONFIG_EXT3_FS_XATTR" = "y" ]; then
66 + if [ "$CONFIG_EXT2_FS" = "y" -o "$CONFIG_EXT3_FS" = "y" ]; then
67 + define_tristate CONFIG_FS_MBCACHE y
69 + if [ "$CONFIG_EXT2_FS" = "m" -o "$CONFIG_EXT3_FS" = "m" ]; then
70 + define_tristate CONFIG_FS_MBCACHE m
75 mainmenu_option next_comment
76 diff -Nru a/fs/Makefile b/fs/Makefile
77 --- a/fs/Makefile Sun Dec 8 02:49:56 2002
78 +++ b/fs/Makefile Sun Dec 8 02:49:56 2002
82 export-objs := open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \
83 - fcntl.o read_write.o dcookies.o
84 + fcntl.o read_write.o dcookies.o mbcache.o
86 obj-y := open.o read_write.o devices.o file_table.o buffer.o \
87 bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \
89 obj-y += binfmt_script.o
91 obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
93 +obj-$(CONFIG_FS_MBCACHE) += mbcache.o
95 obj-$(CONFIG_QUOTA) += dquot.o
96 obj-$(CONFIG_QFMT_V1) += quota_v1.o
97 diff -Nru a/fs/ext3/Makefile b/fs/ext3/Makefile
98 --- a/fs/ext3/Makefile Sun Dec 8 02:49:56 2002
99 +++ b/fs/ext3/Makefile Sun Dec 8 02:49:56 2002
101 ext3-objs := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
102 ioctl.o namei.o super.o symlink.o hash.o
104 +export-objs += xattr.o
106 +ifeq ($(CONFIG_EXT3_FS_XATTR),y)
107 +ext3-objs += xattr.o xattr_user.o
110 include $(TOPDIR)/Rules.make
111 diff -Nru a/fs/ext3/file.c b/fs/ext3/file.c
112 --- a/fs/ext3/file.c Sun Dec 8 02:49:56 2002
113 +++ b/fs/ext3/file.c Sun Dec 8 02:49:56 2002
115 #include <linux/jbd.h>
116 #include <linux/ext3_fs.h>
117 #include <linux/ext3_jbd.h>
118 -#include <linux/smp_lock.h>
122 * Called when an inode is released. Note that this is different
124 struct inode_operations ext3_file_inode_operations = {
125 .truncate = ext3_truncate,
126 .setattr = ext3_setattr,
127 + .setxattr = ext3_setxattr,
128 + .getxattr = ext3_getxattr,
129 + .listxattr = ext3_listxattr,
130 + .removexattr = ext3_removexattr,
133 diff -Nru a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
134 --- a/fs/ext3/ialloc.c Sun Dec 8 02:49:56 2002
135 +++ b/fs/ext3/ialloc.c Sun Dec 8 02:49:56 2002
137 #include <asm/bitops.h>
138 #include <asm/byteorder.h>
143 * ialloc.c contains the inodes allocation and deallocation routines
146 * as writing the quota to disk may need the lock as well.
149 + ext3_xattr_delete_inode(handle, inode);
150 DQUOT_FREE_INODE(inode);
153 diff -Nru a/fs/ext3/inode.c b/fs/ext3/inode.c
154 --- a/fs/ext3/inode.c Sun Dec 8 02:49:56 2002
155 +++ b/fs/ext3/inode.c Sun Dec 8 02:49:56 2002
158 #undef SEARCH_FROM_ZERO
161 + * Test whether an inode is a fast symlink.
163 +static inline int ext3_inode_is_fast_symlink(struct inode *inode)
165 + int ea_blocks = EXT3_I(inode)->i_file_acl ?
166 + (inode->i_sb->s_blocksize >> 9) : 0;
168 + return (S_ISLNK(inode->i_mode) &&
169 + inode->i_blocks - ea_blocks == 0);
172 /* The ext3 forget function must perform a revoke if we are freeing data
173 * which has been journaled. Metadata (eg. indirect blocks) must be
174 * revoked in all cases.
176 * still needs to be revoked.
179 -static int ext3_forget(handle_t *handle, int is_metadata,
180 +int ext3_forget(handle_t *handle, int is_metadata,
181 struct inode *inode, struct buffer_head *bh,
188 - if (is_bad_inode(inode) ||
189 - inode->i_ino == EXT3_ACL_IDX_INO ||
190 - inode->i_ino == EXT3_ACL_DATA_INO)
191 + if (is_bad_inode(inode))
195 @@ -1979,6 +1989,8 @@
196 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
197 S_ISLNK(inode->i_mode)))
199 + if (ext3_inode_is_fast_symlink(inode))
201 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
204 @@ -2130,8 +2142,6 @@
205 struct ext3_group_desc * gdp;
207 if ((inode->i_ino != EXT3_ROOT_INO &&
208 - inode->i_ino != EXT3_ACL_IDX_INO &&
209 - inode->i_ino != EXT3_ACL_DATA_INO &&
210 inode->i_ino != EXT3_JOURNAL_INO &&
211 inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
212 inode->i_ino > le32_to_cpu(
213 @@ -2263,10 +2273,7 @@
217 - if (inode->i_ino == EXT3_ACL_IDX_INO ||
218 - inode->i_ino == EXT3_ACL_DATA_INO)
219 - /* Nothing to do */ ;
220 - else if (S_ISREG(inode->i_mode)) {
221 + if (S_ISREG(inode->i_mode)) {
222 inode->i_op = &ext3_file_inode_operations;
223 inode->i_fop = &ext3_file_operations;
224 if (ext3_should_writeback_data(inode))
225 @@ -2277,18 +2284,20 @@
226 inode->i_op = &ext3_dir_inode_operations;
227 inode->i_fop = &ext3_dir_operations;
228 } else if (S_ISLNK(inode->i_mode)) {
229 - if (!inode->i_blocks)
230 + if (ext3_inode_is_fast_symlink(inode))
231 inode->i_op = &ext3_fast_symlink_inode_operations;
233 - inode->i_op = &page_symlink_inode_operations;
234 + inode->i_op = &ext3_symlink_inode_operations;
235 if (ext3_should_writeback_data(inode))
236 inode->i_mapping->a_ops = &ext3_writeback_aops;
238 inode->i_mapping->a_ops = &ext3_aops;
242 + inode->i_op = &ext3_special_inode_operations;
243 init_special_inode(inode, inode->i_mode,
244 le32_to_cpu(iloc.raw_inode->i_block[0]));
246 if (ei->i_flags & EXT3_SYNC_FL)
247 inode->i_flags |= S_SYNC;
248 if (ei->i_flags & EXT3_APPEND_FL)
249 diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c
250 --- a/fs/ext3/namei.c Sun Dec 8 02:49:56 2002
251 +++ b/fs/ext3/namei.c Sun Dec 8 02:49:56 2002
253 #include <linux/quotaops.h>
254 #include <linux/buffer_head.h>
255 #include <linux/smp_lock.h>
260 @@ -1654,7 +1655,7 @@
264 - inode = ext3_new_inode (handle, dir, S_IFDIR);
265 + inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
266 err = PTR_ERR(inode);
269 @@ -1662,7 +1663,6 @@
270 inode->i_op = &ext3_dir_inode_operations;
271 inode->i_fop = &ext3_dir_operations;
272 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
273 - inode->i_blocks = 0;
274 dir_block = ext3_bread (handle, inode, 0, 1, &err);
276 inode->i_nlink--; /* is this nlink == 0? */
277 @@ -1689,9 +1689,6 @@
278 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
279 ext3_journal_dirty_metadata(handle, dir_block);
281 - inode->i_mode = S_IFDIR | mode;
282 - if (dir->i_mode & S_ISGID)
283 - inode->i_mode |= S_ISGID;
284 ext3_mark_inode_dirty(handle, inode);
285 err = ext3_add_entry (handle, dentry, inode);
287 @@ -2068,7 +2065,7 @@
290 if (l > sizeof (EXT3_I(inode)->i_data)) {
291 - inode->i_op = &page_symlink_inode_operations;
292 + inode->i_op = &ext3_symlink_inode_operations;
293 if (ext3_should_writeback_data(inode))
294 inode->i_mapping->a_ops = &ext3_writeback_aops;
296 @@ -2284,4 +2281,17 @@
299 .rename = ext3_rename,
300 + .setxattr = ext3_setxattr,
301 + .getxattr = ext3_getxattr,
302 + .listxattr = ext3_listxattr,
303 + .removexattr = ext3_removexattr,
306 +struct inode_operations ext3_special_inode_operations = {
307 + .setxattr = ext3_setxattr,
308 + .getxattr = ext3_getxattr,
309 + .listxattr = ext3_listxattr,
310 + .removexattr = ext3_removexattr,
314 diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c
315 --- a/fs/ext3/super.c Sun Dec 8 02:49:56 2002
316 +++ b/fs/ext3/super.c Sun Dec 8 02:49:56 2002
318 #include <linux/smp_lock.h>
319 #include <linux/buffer_head.h>
320 #include <asm/uaccess.h>
323 #ifdef CONFIG_JBD_DEBUG
324 static int ext3_ro_after; /* Make fs read-only after this many jiffies */
326 struct ext3_super_block *es = sbi->s_es;
329 + ext3_xattr_put_super(sb);
330 journal_destroy(sbi->s_journal);
331 if (!(sb->s_flags & MS_RDONLY)) {
332 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
336 unsigned long *mount_options = &sbi->s_mount_opt;
338 uid_t *resuid = &sbi->s_resuid;
339 gid_t *resgid = &sbi->s_resgid;
343 if ((value = strchr (this_char, '=')) != NULL)
345 +#ifdef CONFIG_EXT3_FS_XATTR
346 + if (!strcmp (this_char, "user_xattr"))
347 + set_opt (*mount_options, XATTR_USER);
348 + else if (!strcmp (this_char, "nouser_xattr"))
349 + clear_opt (*mount_options, XATTR_USER);
352 if (!strcmp (this_char, "bsddf"))
353 clear_opt (*mount_options, MINIX_DF);
354 else if (!strcmp (this_char, "nouid32")) {
356 sbi->s_mount_opt = 0;
357 sbi->s_resuid = EXT3_DEF_RESUID;
358 sbi->s_resgid = EXT3_DEF_RESGID;
360 + /* Default extended attribute flags */
361 +#ifdef CONFIG_EXT3_FS_XATTR
362 + set_opt(sbi->s_mount_opt, XATTR_USER);
365 if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0))
368 @@ -1820,7 +1836,10 @@
370 static int __init init_ext3_fs(void)
372 - int err = init_inodecache();
373 + int err = init_ext3_xattr();
376 + err = init_inodecache();
379 err = register_filesystem(&ext3_fs_type);
380 @@ -1830,6 +1849,7 @@
382 destroy_inodecache();
388 @@ -1837,6 +1857,7 @@
390 unregister_filesystem(&ext3_fs_type);
391 destroy_inodecache();
395 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
396 diff -Nru a/fs/ext3/symlink.c b/fs/ext3/symlink.c
397 --- a/fs/ext3/symlink.c Sun Dec 8 02:49:56 2002
398 +++ b/fs/ext3/symlink.c Sun Dec 8 02:49:56 2002
400 #include <linux/fs.h>
401 #include <linux/jbd.h>
402 #include <linux/ext3_fs.h>
405 static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen)
408 return vfs_follow_link(nd, (char*)ei->i_data);
411 +struct inode_operations ext3_symlink_inode_operations = {
412 + .readlink = page_readlink,
413 + .follow_link = page_follow_link,
414 + .setxattr = ext3_setxattr,
415 + .getxattr = ext3_getxattr,
416 + .listxattr = ext3_listxattr,
417 + .removexattr = ext3_removexattr,
420 struct inode_operations ext3_fast_symlink_inode_operations = {
421 - .readlink = ext3_readlink, /* BKL not held. Don't need */
422 + .readlink = ext3_readlink, /* BKL not held. Don't need */
423 .follow_link = ext3_follow_link, /* BKL not held. Don't need */
424 + .setxattr = ext3_setxattr,
425 + .getxattr = ext3_getxattr,
426 + .listxattr = ext3_listxattr,
427 + .removexattr = ext3_removexattr,
429 diff -Nru a/fs/ext3/xattr.c b/fs/ext3/xattr.c
430 --- /dev/null Wed Dec 31 16:00:00 1969
431 +++ b/fs/ext3/xattr.c Sun Dec 8 02:49:56 2002
434 + * linux/fs/ext3/xattr.c
436 + * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
438 + * Fix by Harrison Xing <harrison@mountainviewdata.com>.
439 + * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
440 + * Extended attributes for symlinks and special files added per
441 + * suggestion of Luka Renko <luka.renko@hermes.si>.
445 + * Extended attributes are stored on disk blocks allocated outside of
446 + * any inode. The i_file_acl field is then made to point to this allocated
447 + * block. If all extended attributes of an inode are identical, these
448 + * inodes may share the same extended attribute block. Such situations
449 + * are automatically detected by keeping a cache of recent attribute block
450 + * numbers and hashes over the block's contents in memory.
453 + * Extended attribute block layout:
455 + * +------------------+
458 + * | entry 2 | | growing downwards
460 + * | four null bytes |
463 + * | value 3 | | growing upwards
465 + * +------------------+
467 + * The block header is followed by multiple entry descriptors. These entry
468 + * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
469 + * byte boundaries. The entry descriptors are sorted by attribute name,
470 + * so that two extended attribute blocks can be compared efficiently.
472 + * Attribute values are aligned to the end of the block, stored in
473 + * no specific order. They are also padded to EXT3_XATTR_PAD byte
474 + * boundaries. No additional gaps are left between them.
478 + * The VFS holdsinode->i_sem semaphore when any of the xattr inode
479 + * operations are called, so we are guaranteed that only one
480 + * processes accesses extended attributes of an inode at any time.
482 + * For writing we also grab the ext3_xattr_sem semaphore. This ensures that
483 + * only a single process is modifying an extended attribute block, even
484 + * if the block is shared among inodes.
487 +#include <linux/init.h>
488 +#include <linux/fs.h>
489 +#include <linux/slab.h>
490 +#include <linux/ext3_jbd.h>
491 +#include <linux/ext3_fs.h>
492 +#include <linux/mbcache.h>
493 +#include <linux/quotaops.h>
494 +#include <asm/semaphore.h>
497 +#define EXT3_EA_USER "user."
499 +#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
500 +#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
501 +#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
502 +#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
504 +#ifdef EXT3_XATTR_DEBUG
505 +# define ea_idebug(inode, f...) do { \
506 + printk(KERN_DEBUG "inode %s:%ld: ", \
507 + kdevname(inode->i_dev), inode->i_ino); \
511 +# define ea_bdebug(bh, f...) do { \
512 + printk(KERN_DEBUG "block %s:%ld: ", \
513 + kdevname(bh->b_dev), bh->b_blocknr); \
518 +# define ea_idebug(f...)
519 +# define ea_bdebug(f...)
522 +static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *,
523 + struct ext3_xattr_header *);
525 +static int ext3_xattr_cache_insert(struct buffer_head *);
526 +static struct buffer_head *ext3_xattr_cache_find(struct inode *,
527 + struct ext3_xattr_header *);
528 +static void ext3_xattr_cache_remove(struct buffer_head *);
529 +static void ext3_xattr_rehash(struct ext3_xattr_header *,
530 + struct ext3_xattr_entry *);
532 +static struct mb_cache *ext3_xattr_cache;
535 + * If a file system does not share extended attributes among inodes,
536 + * we should not need the ext3_xattr_sem semaphore. However, the
537 + * filesystem may still contain shared blocks, so we always take
541 +static DECLARE_MUTEX(ext3_xattr_sem);
542 +static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX];
543 +static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED;
546 +ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler)
548 + int error = -EINVAL;
550 + if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
551 + write_lock(&ext3_handler_lock);
552 + if (!ext3_xattr_handlers[name_index-1]) {
553 + ext3_xattr_handlers[name_index-1] = handler;
556 + write_unlock(&ext3_handler_lock);
562 +ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler)
564 + if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) {
565 + write_lock(&ext3_handler_lock);
566 + ext3_xattr_handlers[name_index-1] = NULL;
567 + write_unlock(&ext3_handler_lock);
571 +static inline const char *
572 +strcmp_prefix(const char *a, const char *a_prefix)
574 + while (*a_prefix && *a == *a_prefix) {
578 + return *a_prefix ? NULL : a;
582 + * Decode the extended attribute name, and translate it into
583 + * the name_index and name suffix.
585 +static inline struct ext3_xattr_handler *
586 +ext3_xattr_resolve_name(const char **name)
588 + struct ext3_xattr_handler *handler = NULL;
593 + read_lock(&ext3_handler_lock);
594 + for (i=0; i<EXT3_XATTR_INDEX_MAX; i++) {
595 + if (ext3_xattr_handlers[i]) {
596 + const char *n = strcmp_prefix(*name,
597 + ext3_xattr_handlers[i]->prefix);
599 + handler = ext3_xattr_handlers[i];
605 + read_unlock(&ext3_handler_lock);
609 +static inline struct ext3_xattr_handler *
610 +ext3_xattr_handler(int name_index)
612 + struct ext3_xattr_handler *handler = NULL;
613 + if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
614 + read_lock(&ext3_handler_lock);
615 + handler = ext3_xattr_handlers[name_index-1];
616 + read_unlock(&ext3_handler_lock);
622 + * Inode operation getxattr()
624 + * dentry->d_inode->i_sem down
627 +ext3_getxattr(struct dentry *dentry, const char *name,
628 + void *buffer, size_t size)
630 + struct ext3_xattr_handler *handler;
631 + struct inode *inode = dentry->d_inode;
633 + handler = ext3_xattr_resolve_name(&name);
635 + return -EOPNOTSUPP;
636 + return handler->get(inode, name, buffer, size);
640 + * Inode operation listxattr()
642 + * dentry->d_inode->i_sem down
645 +ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
647 + return ext3_xattr_list(dentry->d_inode, buffer, size);
651 + * Inode operation setxattr()
653 + * dentry->d_inode->i_sem down
656 +ext3_setxattr(struct dentry *dentry, const char *name,
657 + void *value, size_t size, int flags)
659 + struct ext3_xattr_handler *handler;
660 + struct inode *inode = dentry->d_inode;
663 + value = ""; /* empty EA, do not remove */
664 + handler = ext3_xattr_resolve_name(&name);
666 + return -EOPNOTSUPP;
667 + return handler->set(inode, name, value, size, flags);
671 + * Inode operation removexattr()
673 + * dentry->d_inode->i_sem down
676 +ext3_removexattr(struct dentry *dentry, const char *name)
678 + struct ext3_xattr_handler *handler;
679 + struct inode *inode = dentry->d_inode;
681 + handler = ext3_xattr_resolve_name(&name);
683 + return -EOPNOTSUPP;
684 + return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
690 + * Copy an extended attribute into the buffer
691 + * provided, or compute the buffer size required.
692 + * Buffer is NULL to compute the size of the buffer required.
694 + * Returns a negative error number on failure, or the number of bytes
695 + * used / required on success.
698 +ext3_xattr_get(struct inode *inode, int name_index, const char *name,
699 + void *buffer, size_t buffer_size)
701 + struct buffer_head *bh = NULL;
702 + struct ext3_xattr_entry *entry;
703 + unsigned int block, size;
705 + int name_len, error;
707 + ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
708 + name_index, name, buffer, (long)buffer_size);
712 + if (!EXT3_I(inode)->i_file_acl)
714 + block = EXT3_I(inode)->i_file_acl;
715 + ea_idebug(inode, "reading block %d", block);
716 + bh = sb_bread(inode->i_sb, block);
719 + ea_bdebug(bh, "b_count=%d, refcount=%d",
720 + atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
721 + end = bh->b_data + bh->b_size;
722 + if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
723 + HDR(bh)->h_blocks != cpu_to_le32(1)) {
724 +bad_block: ext3_error(inode->i_sb, "ext3_xattr_get",
725 + "inode %ld: bad block %d", inode->i_ino, block);
729 + /* find named attribute */
730 + name_len = strlen(name);
733 + if (name_len > 255)
735 + entry = FIRST_ENTRY(bh);
736 + while (!IS_LAST_ENTRY(entry)) {
737 + struct ext3_xattr_entry *next =
738 + EXT3_XATTR_NEXT(entry);
739 + if ((char *)next >= end)
741 + if (name_index == entry->e_name_index &&
742 + name_len == entry->e_name_len &&
743 + memcmp(name, entry->e_name, name_len) == 0)
747 + /* Check the remaining name entries */
748 + while (!IS_LAST_ENTRY(entry)) {
749 + struct ext3_xattr_entry *next =
750 + EXT3_XATTR_NEXT(entry);
751 + if ((char *)next >= end)
755 + if (ext3_xattr_cache_insert(bh))
756 + ea_idebug(inode, "cache insert failed");
760 + /* check the buffer size */
761 + if (entry->e_value_block != 0)
763 + size = le32_to_cpu(entry->e_value_size);
764 + if (size > inode->i_sb->s_blocksize ||
765 + le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
768 + if (ext3_xattr_cache_insert(bh))
769 + ea_idebug(inode, "cache insert failed");
772 + if (size > buffer_size)
774 + /* return value of attribute */
775 + memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
787 + * ext3_xattr_list()
789 + * Copy a list of attribute names into the buffer
790 + * provided, or compute the buffer size required.
791 + * Buffer is NULL to compute the size of the buffer required.
793 + * Returns a negative error number on failure, or the number of bytes
794 + * used / required on success.
797 +ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
799 + struct buffer_head *bh = NULL;
800 + struct ext3_xattr_entry *entry;
801 + unsigned int block, size = 0;
805 + ea_idebug(inode, "buffer=%p, buffer_size=%ld",
806 + buffer, (long)buffer_size);
808 + if (!EXT3_I(inode)->i_file_acl)
810 + block = EXT3_I(inode)->i_file_acl;
811 + ea_idebug(inode, "reading block %d", block);
812 + bh = sb_bread(inode->i_sb, block);
815 + ea_bdebug(bh, "b_count=%d, refcount=%d",
816 + atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
817 + end = bh->b_data + bh->b_size;
818 + if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
819 + HDR(bh)->h_blocks != cpu_to_le32(1)) {
820 +bad_block: ext3_error(inode->i_sb, "ext3_xattr_list",
821 + "inode %ld: bad block %d", inode->i_ino, block);
825 + /* compute the size required for the list of attribute names */
826 + for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
827 + entry = EXT3_XATTR_NEXT(entry)) {
828 + struct ext3_xattr_handler *handler;
829 + struct ext3_xattr_entry *next =
830 + EXT3_XATTR_NEXT(entry);
831 + if ((char *)next >= end)
834 + handler = ext3_xattr_handler(entry->e_name_index);
836 + size += handler->list(NULL, inode, entry->e_name,
837 + entry->e_name_len) + 1;
841 + if (ext3_xattr_cache_insert(bh))
842 + ea_idebug(inode, "cache insert failed");
848 + if (size > buffer_size)
852 + /* list the attribute names */
854 + for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
855 + entry = EXT3_XATTR_NEXT(entry)) {
856 + struct ext3_xattr_handler *handler;
858 + handler = ext3_xattr_handler(entry->e_name_index);
860 + buf += handler->list(buf, inode, entry->e_name,
861 + entry->e_name_len);
874 + * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
877 +static void ext3_xattr_update_super_block(handle_t *handle,
878 + struct super_block *sb)
880 + if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
884 + ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
885 + EXT3_SB(sb)->s_es->s_feature_compat |=
886 + cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
888 + ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
895 + * Create, replace or remove an extended attribute for this inode. Buffer
896 + * is NULL to remove an existing extended attribute, and non-NULL to
897 + * either replace an existing extended attribute, or create a new extended
898 + * attribute. The flags XATTR_REPLACE and XATTR_CREATE
899 + * specify that an extended attribute must exist and must not exist
900 + * previous to the call, respectively.
902 + * Returns 0, or a negative error number on failure.
905 +ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
906 + const char *name, const void *value, size_t value_len, int flags)
908 + struct super_block *sb = inode->i_sb;
909 + struct buffer_head *bh = NULL;
910 + struct ext3_xattr_header *header = NULL;
911 + struct ext3_xattr_entry *here, *last;
912 + unsigned int name_len;
913 + int min_offs = sb->s_blocksize, not_found = 1, free, error;
917 + * header -- Points either into bh, or to a temporarily
918 + * allocated buffer.
919 + * here -- The named entry found, or the place for inserting, within
920 + * the block pointed to by header.
921 + * last -- Points right after the last named entry within the block
922 + * pointed to by header.
923 + * min_offs -- The offset of the first value (values are aligned
924 + * towards the end of the block).
925 + * end -- Points right after the block pointed to by header.
928 + ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
929 + name_index, name, value, (long)value_len);
931 + if (IS_RDONLY(inode))
933 + if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
939 + name_len = strlen(name);
940 + if (name_len > 255 || value_len > sb->s_blocksize)
942 + down(&ext3_xattr_sem);
944 + if (EXT3_I(inode)->i_file_acl) {
945 + /* The inode already has an extended attribute block. */
946 + int block = EXT3_I(inode)->i_file_acl;
948 + bh = sb_bread(sb, block);
952 + ea_bdebug(bh, "b_count=%d, refcount=%d",
953 + atomic_read(&(bh->b_count)),
954 + le32_to_cpu(HDR(bh)->h_refcount));
956 + end = bh->b_data + bh->b_size;
957 + if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
958 + header->h_blocks != cpu_to_le32(1)) {
959 +bad_block: ext3_error(sb, "ext3_xattr_set",
960 + "inode %ld: bad block %d", inode->i_ino, block);
964 + /* Find the named attribute. */
965 + here = FIRST_ENTRY(bh);
966 + while (!IS_LAST_ENTRY(here)) {
967 + struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
968 + if ((char *)next >= end)
970 + if (!here->e_value_block && here->e_value_size) {
971 + int offs = le16_to_cpu(here->e_value_offs);
972 + if (offs < min_offs)
975 + not_found = name_index - here->e_name_index;
977 + not_found = name_len - here->e_name_len;
979 + not_found = memcmp(name, here->e_name,name_len);
980 + if (not_found <= 0)
985 + /* We still need to compute min_offs and last. */
986 + while (!IS_LAST_ENTRY(last)) {
987 + struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
988 + if ((char *)next >= end)
990 + if (!last->e_value_block && last->e_value_size) {
991 + int offs = le16_to_cpu(last->e_value_offs);
992 + if (offs < min_offs)
998 + /* Check whether we have enough space left. */
999 + free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
1001 + /* We will use a new extended attribute block. */
1002 + free = sb->s_blocksize -
1003 + sizeof(struct ext3_xattr_header) - sizeof(__u32);
1004 + here = last = NULL; /* avoid gcc uninitialized warning. */
1008 + /* Request to remove a nonexistent attribute? */
1010 + if (flags & XATTR_REPLACE)
1013 + if (value == NULL)
1016 + free -= EXT3_XATTR_LEN(name_len);
1018 + /* Request to create an existing attribute? */
1020 + if (flags & XATTR_CREATE)
1022 + if (!here->e_value_block && here->e_value_size) {
1023 + unsigned int size = le32_to_cpu(here->e_value_size);
1025 + if (le16_to_cpu(here->e_value_offs) + size >
1026 + sb->s_blocksize || size > sb->s_blocksize)
1028 + free += EXT3_XATTR_SIZE(size);
1031 + free -= EXT3_XATTR_SIZE(value_len);
1036 + /* Here we know that we can set the new attribute. */
1039 + if (header->h_refcount == cpu_to_le32(1)) {
1040 + ea_bdebug(bh, "modifying in-place");
1041 + ext3_xattr_cache_remove(bh);
1042 + error = ext3_journal_get_write_access(handle, bh);
1048 + ea_bdebug(bh, "cloning");
1049 + header = kmalloc(bh->b_size, GFP_KERNEL);
1051 + if (header == NULL)
1053 + memcpy(header, HDR(bh), bh->b_size);
1054 + header->h_refcount = cpu_to_le32(1);
1055 + offset = (char *)header - bh->b_data;
1056 + here = ENTRY((char *)here + offset);
1057 + last = ENTRY((char *)last + offset);
1060 + /* Allocate a buffer where we construct the new block. */
1061 + header = kmalloc(sb->s_blocksize, GFP_KERNEL);
1063 + if (header == NULL)
1065 + memset(header, 0, sb->s_blocksize);
1066 + end = (char *)header + sb->s_blocksize;
1067 + header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
1068 + header->h_blocks = header->h_refcount = cpu_to_le32(1);
1069 + last = here = ENTRY(header+1);
1073 + /* Insert the new name. */
1074 + int size = EXT3_XATTR_LEN(name_len);
1075 + int rest = (char *)last - (char *)here;
1076 + memmove((char *)here + size, here, rest);
1077 + memset(here, 0, size);
1078 + here->e_name_index = name_index;
1079 + here->e_name_len = name_len;
1080 + memcpy(here->e_name, name, name_len);
1082 + /* Remove the old value. */
1083 + if (!here->e_value_block && here->e_value_size) {
1084 + char *first_val = (char *)header + min_offs;
1085 + int offs = le16_to_cpu(here->e_value_offs);
1086 + char *val = (char *)header + offs;
1087 + size_t size = EXT3_XATTR_SIZE(
1088 + le32_to_cpu(here->e_value_size));
1089 + memmove(first_val + size, first_val, val - first_val);
1090 + memset(first_val, 0, size);
1091 + here->e_value_offs = 0;
1094 + /* Adjust all value offsets. */
1095 + last = ENTRY(header+1);
1096 + while (!IS_LAST_ENTRY(last)) {
1097 + int o = le16_to_cpu(last->e_value_offs);
1098 + if (!last->e_value_block && o < offs)
1099 + last->e_value_offs =
1100 + cpu_to_le16(o + size);
1101 + last = EXT3_XATTR_NEXT(last);
1104 + if (value == NULL) {
1105 + /* Remove this attribute. */
1106 + if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) {
1107 + /* This block is now empty. */
1108 + error = ext3_xattr_set2(handle, inode, bh,NULL);
1111 + /* Remove the old name. */
1112 + int size = EXT3_XATTR_LEN(name_len);
1113 + last = ENTRY((char *)last - size);
1114 + memmove(here, (char*)here + size,
1115 + (char*)last - (char*)here);
1116 + memset(last, 0, size);
1121 + if (value != NULL) {
1122 + /* Insert the new value. */
1123 + here->e_value_size = cpu_to_le32(value_len);
1125 + size_t size = EXT3_XATTR_SIZE(value_len);
1126 + char *val = (char *)header + min_offs - size;
1127 + here->e_value_offs =
1128 + cpu_to_le16((char *)val - (char *)header);
1129 + memset(val + size - EXT3_XATTR_PAD, 0,
1130 + EXT3_XATTR_PAD); /* Clear the pad bytes. */
1131 + memcpy(val, value, value_len);
1134 + ext3_xattr_rehash(header, here);
1136 + error = ext3_xattr_set2(handle, inode, bh, header);
1140 + if (!(bh && header == HDR(bh)))
1142 + up(&ext3_xattr_sem);
1148 + * Second half of ext3_xattr_set(): Update the file system.
1151 +ext3_xattr_set2(handle_t *handle, struct inode *inode,
1152 + struct buffer_head *old_bh, struct ext3_xattr_header *header)
1154 + struct super_block *sb = inode->i_sb;
1155 + struct buffer_head *new_bh = NULL;
1159 + new_bh = ext3_xattr_cache_find(inode, header);
1162 + * We found an identical block in the cache.
1163 + * The old block will be released after updating
1166 + ea_bdebug(old_bh, "reusing block %ld",
1167 + new_bh->b_blocknr);
1170 + if (DQUOT_ALLOC_BLOCK(inode, 1))
1173 + error = ext3_journal_get_write_access(handle, new_bh);
1176 + HDR(new_bh)->h_refcount = cpu_to_le32(
1177 + le32_to_cpu(HDR(new_bh)->h_refcount) + 1);
1178 + ea_bdebug(new_bh, "refcount now=%d",
1179 + le32_to_cpu(HDR(new_bh)->h_refcount));
1180 + } else if (old_bh && header == HDR(old_bh)) {
1181 + /* Keep this block. */
1183 + ext3_xattr_cache_insert(new_bh);
1185 + /* We need to allocate a new block */
1187 + int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
1188 + EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb);
1190 + block = ext3_new_block(handle, inode, goal, 0,
1194 + ea_idebug(inode, "creating block %d", block);
1196 + new_bh = sb_getblk(sb, block);
1199 + ext3_free_blocks(handle, inode, block, 1);
1203 + lock_buffer(new_bh);
1204 + error = ext3_journal_get_create_access(handle, new_bh);
1206 + unlock_buffer(new_bh);
1207 + goto getblk_failed;
1209 + memcpy(new_bh->b_data, header, new_bh->b_size);
1210 + set_buffer_uptodate(new_bh);
1211 + unlock_buffer(new_bh);
1212 + ext3_xattr_cache_insert(new_bh);
1214 + ext3_xattr_update_super_block(handle, sb);
1216 + error = ext3_journal_dirty_metadata(handle, new_bh);
1221 + /* Update the inode. */
1222 + EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
1223 + inode->i_ctime = CURRENT_TIME;
1224 + ext3_mark_inode_dirty(handle, inode);
1225 + if (IS_SYNC(inode))
1226 + handle->h_sync = 1;
1229 + if (old_bh && old_bh != new_bh) {
1231 + * If there was an old block, and we are not still using it,
1232 + * we now release the old block.
1234 + unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount);
1236 + error = ext3_journal_get_write_access(handle, old_bh);
1239 + if (refcount == 1) {
1240 + /* Free the old block. */
1241 + ea_bdebug(old_bh, "freeing");
1242 + ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
1244 + /* ext3_forget() calls bforget() for us, but we
1245 + let our caller release old_bh, so we need to
1246 + duplicate the handle before. */
1248 + ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
1250 + /* Decrement the refcount only. */
1252 + HDR(old_bh)->h_refcount = cpu_to_le32(refcount);
1253 + DQUOT_FREE_BLOCK(inode, 1);
1254 + ext3_journal_dirty_metadata(handle, old_bh);
1255 + ea_bdebug(old_bh, "refcount now=%d", refcount);
1260 + if (old_bh != new_bh)
1267 + * ext3_xattr_delete_inode()
1269 + * Free extended attribute resources associated with this inode. This
1270 + * is called immediately before an inode is freed.
1273 +ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
1275 + struct buffer_head *bh;
1276 + unsigned int block = EXT3_I(inode)->i_file_acl;
1280 + down(&ext3_xattr_sem);
1282 + bh = sb_bread(inode->i_sb, block);
1284 + ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
1285 + "inode %ld: block %d read error", inode->i_ino, block);
1288 + ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
1289 + if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
1290 + HDR(bh)->h_blocks != cpu_to_le32(1)) {
1291 + ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
1292 + "inode %ld: bad block %d", inode->i_ino, block);
1295 + ext3_journal_get_write_access(handle, bh);
1296 + ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
1297 + if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
1298 + ext3_xattr_cache_remove(bh);
1299 + ext3_free_blocks(handle, inode, block, 1);
1300 + ext3_forget(handle, 1, inode, bh, block);
1303 + HDR(bh)->h_refcount = cpu_to_le32(
1304 + le32_to_cpu(HDR(bh)->h_refcount) - 1);
1305 + ext3_journal_dirty_metadata(handle, bh);
1306 + if (IS_SYNC(inode))
1307 + handle->h_sync = 1;
1308 + DQUOT_FREE_BLOCK(inode, 1);
1310 + EXT3_I(inode)->i_file_acl = 0;
1314 + up(&ext3_xattr_sem);
1318 + * ext3_xattr_put_super()
1320 + * This is called when a file system is unmounted.
1323 +ext3_xattr_put_super(struct super_block *sb)
1325 + mb_cache_shrink(ext3_xattr_cache, sb->s_bdev);
1329 + * ext3_xattr_cache_insert()
1331 + * Create a new entry in the extended attribute cache, and insert
1332 + * it unless such an entry is already in the cache.
1334 + * Returns 0, or a negative error number on failure.
1337 +ext3_xattr_cache_insert(struct buffer_head *bh)
1339 + __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
1340 + struct mb_cache_entry *ce;
1343 + ce = mb_cache_entry_alloc(ext3_xattr_cache);
1346 + error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
1348 + mb_cache_entry_free(ce);
1349 + if (error == -EBUSY) {
1350 + ea_bdebug(bh, "already in cache (%d cache entries)",
1351 + atomic_read(&ext3_xattr_cache->c_entry_count));
1355 + ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
1356 + atomic_read(&ext3_xattr_cache->c_entry_count));
1357 + mb_cache_entry_release(ce);
1363 + * ext3_xattr_cmp()
1365 + * Compare two extended attribute blocks for equality.
1367 + * Returns 0 if the blocks are equal, 1 if they differ, and
1368 + * a negative error number on errors.
1371 +ext3_xattr_cmp(struct ext3_xattr_header *header1,
1372 + struct ext3_xattr_header *header2)
1374 + struct ext3_xattr_entry *entry1, *entry2;
1376 + entry1 = ENTRY(header1+1);
1377 + entry2 = ENTRY(header2+1);
1378 + while (!IS_LAST_ENTRY(entry1)) {
1379 + if (IS_LAST_ENTRY(entry2))
1381 + if (entry1->e_hash != entry2->e_hash ||
1382 + entry1->e_name_len != entry2->e_name_len ||
1383 + entry1->e_value_size != entry2->e_value_size ||
1384 + memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1386 + if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1388 + if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1389 + (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1390 + le32_to_cpu(entry1->e_value_size)))
1393 + entry1 = EXT3_XATTR_NEXT(entry1);
1394 + entry2 = EXT3_XATTR_NEXT(entry2);
1396 + if (!IS_LAST_ENTRY(entry2))
1402 + * ext3_xattr_cache_find()
1404 + * Find an identical extended attribute block.
1406 + * Returns a pointer to the block found, or NULL if such a block was
1407 + * not found or an error occurred.
1409 +static struct buffer_head *
1410 +ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header)
1412 + __u32 hash = le32_to_cpu(header->h_hash);
1413 + struct mb_cache_entry *ce;
1415 + if (!header->h_hash)
1416 + return NULL; /* never share */
1417 + ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1418 + ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_bdev, hash);
1420 + struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
1423 + ext3_error(inode->i_sb, "ext3_xattr_cache_find",
1424 + "inode %ld: block %ld read error",
1425 + inode->i_ino, (unsigned long) ce->e_block);
1426 + } else if (le32_to_cpu(HDR(bh)->h_refcount) >
1427 + EXT3_XATTR_REFCOUNT_MAX) {
1428 + ea_idebug(inode, "block %ld refcount %d>%d",
1429 + (unsigned long) ce->e_block,
1430 + le32_to_cpu(HDR(bh)->h_refcount),
1431 + EXT3_XATTR_REFCOUNT_MAX);
1432 + } else if (!ext3_xattr_cmp(header, HDR(bh))) {
1433 + ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count)));
1434 + mb_cache_entry_release(ce);
1438 + ce = mb_cache_entry_find_next(ce, 0, inode->i_bdev, hash);
1444 + * ext3_xattr_cache_remove()
1446 + * Remove the cache entry of a block from the cache. Called when a
1447 + * block becomes invalid.
1450 +ext3_xattr_cache_remove(struct buffer_head *bh)
1452 + struct mb_cache_entry *ce;
1454 + ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
1457 + ea_bdebug(bh, "removing (%d cache entries remaining)",
1458 + atomic_read(&ext3_xattr_cache->c_entry_count)-1);
1459 + mb_cache_entry_free(ce);
1461 + ea_bdebug(bh, "no cache entry");
1464 +#define NAME_HASH_SHIFT 5
1465 +#define VALUE_HASH_SHIFT 16
1468 + * ext3_xattr_hash_entry()
1470 + * Compute the hash of an extended attribute.
1472 +static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
1473 + struct ext3_xattr_entry *entry)
1476 + char *name = entry->e_name;
1479 + for (n=0; n < entry->e_name_len; n++) {
1480 + hash = (hash << NAME_HASH_SHIFT) ^
1481 + (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1485 + if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1486 + __u32 *value = (__u32 *)((char *)header +
1487 + le16_to_cpu(entry->e_value_offs));
1488 + for (n = (le32_to_cpu(entry->e_value_size) +
1489 + EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
1490 + hash = (hash << VALUE_HASH_SHIFT) ^
1491 + (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1492 + le32_to_cpu(*value++);
1495 + entry->e_hash = cpu_to_le32(hash);
1498 +#undef NAME_HASH_SHIFT
1499 +#undef VALUE_HASH_SHIFT
1501 +#define BLOCK_HASH_SHIFT 16
1504 + * ext3_xattr_rehash()
1506 + * Re-compute the extended attribute hash value after an entry has changed.
1508 +static void ext3_xattr_rehash(struct ext3_xattr_header *header,
1509 + struct ext3_xattr_entry *entry)
1511 + struct ext3_xattr_entry *here;
1514 + ext3_xattr_hash_entry(header, entry);
1515 + here = ENTRY(header+1);
1516 + while (!IS_LAST_ENTRY(here)) {
1517 + if (!here->e_hash) {
1518 + /* Block is not shared if an entry's hash value == 0 */
1522 + hash = (hash << BLOCK_HASH_SHIFT) ^
1523 + (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1524 + le32_to_cpu(here->e_hash);
1525 + here = EXT3_XATTR_NEXT(here);
1527 + header->h_hash = cpu_to_le32(hash);
1530 +#undef BLOCK_HASH_SHIFT
1533 +init_ext3_xattr(void)
1537 + err = ext3_xattr_register(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler);
1540 + ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
1541 + sizeof(struct mb_cache_entry) +
1542 + sizeof(struct mb_cache_entry_index), 1, 6);
1543 + if (!ext3_xattr_cache) {
1544 + ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler);
1552 +exit_ext3_xattr(void)
1554 + if (ext3_xattr_cache)
1555 + mb_cache_destroy(ext3_xattr_cache);
1556 + ext3_xattr_cache = NULL;
1557 + ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler);
1560 diff -Nru a/fs/ext3/xattr.h b/fs/ext3/xattr.h
1561 --- /dev/null Wed Dec 31 16:00:00 1969
1562 +++ b/fs/ext3/xattr.h Sun Dec 8 02:49:56 2002
1565 + File: fs/ext3/xattr.h
1567 + On-disk format of extended attributes for the ext3 filesystem.
1569 + (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
1572 +#include <linux/config.h>
1573 +#include <linux/xattr.h>
1575 +/* Magic value in attribute blocks */
1576 +#define EXT3_XATTR_MAGIC 0xEA020000
1578 +/* Maximum number of references to one attribute block */
1579 +#define EXT3_XATTR_REFCOUNT_MAX 1024
1582 +#define EXT3_XATTR_INDEX_MAX 10
1583 +#define EXT3_XATTR_INDEX_USER 1
1584 +#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2
1585 +#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3
1587 +struct ext3_xattr_header {
1588 + __u32 h_magic; /* magic number for identification */
1589 + __u32 h_refcount; /* reference count */
1590 + __u32 h_blocks; /* number of disk blocks used */
1591 + __u32 h_hash; /* hash value of all attributes */
1592 + __u32 h_reserved[4]; /* zero right now */
1595 +struct ext3_xattr_entry {
1596 + __u8 e_name_len; /* length of name */
1597 + __u8 e_name_index; /* attribute name index */
1598 + __u16 e_value_offs; /* offset in disk block of value */
1599 + __u32 e_value_block; /* disk block attribute is stored on (n/i) */
1600 + __u32 e_value_size; /* size of attribute value */
1601 + __u32 e_hash; /* hash value of name and value */
1602 + char e_name[0]; /* attribute name */
1605 +#define EXT3_XATTR_PAD_BITS 2
1606 +#define EXT3_XATTR_PAD (1<<EXT3_XATTR_PAD_BITS)
1607 +#define EXT3_XATTR_ROUND (EXT3_XATTR_PAD-1)
1608 +#define EXT3_XATTR_LEN(name_len) \
1609 + (((name_len) + EXT3_XATTR_ROUND + \
1610 + sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
1611 +#define EXT3_XATTR_NEXT(entry) \
1612 + ( (struct ext3_xattr_entry *)( \
1613 + (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
1614 +#define EXT3_XATTR_SIZE(size) \
1615 + (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
1617 +# ifdef CONFIG_EXT3_FS_XATTR
1619 +struct ext3_xattr_handler {
1621 + size_t (*list)(char *list, struct inode *inode, const char *name,
1623 + int (*get)(struct inode *inode, const char *name, void *buffer,
1625 + int (*set)(struct inode *inode, const char *name, const void *buffer,
1626 + size_t size, int flags);
1629 +extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
1630 +extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
1632 +extern int ext3_setxattr(struct dentry *, const char *, void *, size_t, int);
1633 +extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
1634 +extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
1635 +extern int ext3_removexattr(struct dentry *, const char *);
1637 +extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
1638 +extern int ext3_xattr_list(struct inode *, char *, size_t);
1639 +extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int);
1641 +extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
1642 +extern void ext3_xattr_put_super(struct super_block *);
1644 +extern int init_ext3_xattr(void);
1645 +extern void exit_ext3_xattr(void);
1647 +# else /* CONFIG_EXT3_FS_XATTR */
1648 +# define ext3_setxattr NULL
1649 +# define ext3_getxattr NULL
1650 +# define ext3_listxattr NULL
1651 +# define ext3_removexattr NULL
1654 +ext3_xattr_get(struct inode *inode, int name_index, const char *name,
1655 + void *buffer, size_t size, int flags)
1657 + return -EOPNOTSUPP;
1661 +ext3_xattr_list(struct inode *inode, void *buffer, size_t size, int flags)
1663 + return -EOPNOTSUPP;
1667 +ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
1668 + const char *name, const void *value, size_t size, int flags)
1670 + return -EOPNOTSUPP;
1674 +ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
1679 +ext3_xattr_put_super(struct super_block *sb)
1684 +init_ext3_xattr(void)
1690 +exit_ext3_xattr(void)
1694 +# endif /* CONFIG_EXT3_FS_XATTR */
1696 +extern struct ext3_xattr_handler ext3_xattr_user_handler;
1697 diff -Nru a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
1698 --- /dev/null Wed Dec 31 16:00:00 1969
1699 +++ b/fs/ext3/xattr_user.c Sun Dec 8 02:49:56 2002
1702 + * linux/fs/ext3/xattr_user.c
1703 + * Handler for extended user attributes.
1705 + * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
1708 +#include <linux/module.h>
1709 +#include <linux/string.h>
1710 +#include <linux/fs.h>
1711 +#include <linux/smp_lock.h>
1712 +#include <linux/ext3_jbd.h>
1713 +#include <linux/ext3_fs.h>
1716 +#ifdef CONFIG_EXT3_FS_POSIX_ACL
1717 +# include <linux/ext3_acl.h>
1720 +#define XATTR_USER_PREFIX "user."
1723 +ext3_xattr_user_list(char *list, struct inode *inode,
1724 + const char *name, int name_len)
1726 + const int prefix_len = sizeof(XATTR_USER_PREFIX)-1;
1728 + if (!test_opt(inode->i_sb, XATTR_USER))
1732 + memcpy(list, XATTR_USER_PREFIX, prefix_len);
1733 + memcpy(list+prefix_len, name, name_len);
1735 + return prefix_len + name_len;
1739 +ext3_xattr_user_get(struct inode *inode, const char *name,
1740 + void *buffer, size_t size)
1744 + if (strcmp(name, "") == 0)
1746 + if (!test_opt(inode->i_sb, XATTR_USER))
1747 + return -EOPNOTSUPP;
1748 +#ifdef CONFIG_EXT3_FS_POSIX_ACL
1749 + error = ext3_permission_locked(inode, MAY_READ);
1751 + error = permission(inode, MAY_READ);
1756 + return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name,
1761 +ext3_xattr_user_set(struct inode *inode, const char *name,
1762 + const void *value, size_t size, int flags)
1767 + if (strcmp(name, "") == 0)
1769 + if (!test_opt(inode->i_sb, XATTR_USER))
1770 + return -EOPNOTSUPP;
1771 + if ( !S_ISREG(inode->i_mode) &&
1772 + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
1774 +#ifdef CONFIG_EXT3_FS_POSIX_ACL
1775 + error = ext3_permission_locked(inode, MAY_WRITE);
1777 + error = permission(inode, MAY_WRITE);
1783 + handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS);
1784 + if (IS_ERR(handle))
1785 + return PTR_ERR(handle);
1786 + error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name,
1787 + value, size, flags);
1788 + ext3_journal_stop(handle, inode);
1794 +struct ext3_xattr_handler ext3_xattr_user_handler = {
1795 + prefix: XATTR_USER_PREFIX,
1796 + list: ext3_xattr_user_list,
1797 + get: ext3_xattr_user_get,
1798 + set: ext3_xattr_user_set,
1800 diff -Nru a/fs/mbcache.c b/fs/mbcache.c
1801 --- /dev/null Wed Dec 31 16:00:00 1969
1802 +++ b/fs/mbcache.c Sun Dec 8 02:49:56 2002
1805 + * linux/fs/mbcache.c
1806 + * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
1810 + * Filesystem Meta Information Block Cache (mbcache)
1812 + * The mbcache caches blocks of block devices that need to be located
1813 + * by their device/block number, as well as by other criteria (such
1814 + * as the block's contents).
1816 + * There can only be one cache entry in a cache per device and block number.
1817 + * Additional indexes need not be unique in this sense. The number of
1818 + * additional indexes (=other criteria) can be hardwired (at compile time)
1819 + * or specified at cache create time.
1821 + * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
1822 + * in the cache. A valid entry is in the main hash tables of the cache,
1823 + * and may also be in the lru list. An invalid entry is not in any hashes
1826 + * A valid cache entry is only in the lru list if no handles refer to it.
1827 + * Invalid cache entries will be freed when the last handle to the cache
1828 + * entry is released.
1831 +#include <linux/kernel.h>
1832 +#include <linux/module.h>
1834 +#include <linux/hash.h>
1835 +#include <linux/fs.h>
1836 +#include <linux/mm.h>
1837 +#include <linux/slab.h>
1838 +#include <linux/sched.h>
1839 +#include <linux/init.h>
1840 +#include <linux/mbcache.h>
1843 +#ifdef MB_CACHE_DEBUG
1844 +# define mb_debug(f...) do { \
1845 + printk(KERN_DEBUG f); \
1848 +#define mb_assert(c) do { if (!(c)) \
1849 + printk(KERN_ERR "assertion " #c " failed\n"); \
1852 +# define mb_debug(f...) do { } while(0)
1853 +# define mb_assert(c) do { } while(0)
1855 +#define mb_error(f...) do { \
1856 + printk(KERN_ERR f); \
1860 +MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
1861 +MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
1862 +MODULE_LICENSE("GPL");
1864 +EXPORT_SYMBOL(mb_cache_create);
1865 +EXPORT_SYMBOL(mb_cache_shrink);
1866 +EXPORT_SYMBOL(mb_cache_destroy);
1867 +EXPORT_SYMBOL(mb_cache_entry_alloc);
1868 +EXPORT_SYMBOL(mb_cache_entry_insert);
1869 +EXPORT_SYMBOL(mb_cache_entry_release);
1870 +EXPORT_SYMBOL(mb_cache_entry_takeout);
1871 +EXPORT_SYMBOL(mb_cache_entry_free);
1872 +EXPORT_SYMBOL(mb_cache_entry_dup);
1873 +EXPORT_SYMBOL(mb_cache_entry_get);
1874 +#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
1875 +EXPORT_SYMBOL(mb_cache_entry_find_first);
1876 +EXPORT_SYMBOL(mb_cache_entry_find_next);
1881 + * Global data: list of all mbcache's, lru list, and a spinlock for
1882 + * accessing cache data structures on SMP machines. (The lru list is
1883 + * global across all mbcaches.)
1886 +static LIST_HEAD(mb_cache_list);
1887 +static LIST_HEAD(mb_cache_lru_list);
1888 +static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED;
1889 +static struct shrinker *mb_shrinker;
1892 +mb_cache_indexes(struct mb_cache *cache)
1894 +#ifdef MB_CACHE_INDEXES_COUNT
1895 + return MB_CACHE_INDEXES_COUNT;
1897 + return cache->c_indexes_count;
1902 + * What the mbcache registers as to get shrunk dynamically.
1905 +static int mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask);
1908 +__mb_cache_entry_takeout_lru(struct mb_cache_entry *ce)
1910 + if (!list_empty(&ce->e_lru_list))
1911 + list_del_init(&ce->e_lru_list);
1916 +__mb_cache_entry_into_lru(struct mb_cache_entry *ce)
1918 + list_add(&ce->e_lru_list, &mb_cache_lru_list);
1923 +__mb_cache_entry_in_lru(struct mb_cache_entry *ce)
1925 + return (!list_empty(&ce->e_lru_list));
1930 + * Insert the cache entry into all hashes.
1933 +__mb_cache_entry_link(struct mb_cache_entry *ce)
1935 + struct mb_cache *cache = ce->e_cache;
1936 + unsigned int bucket;
1939 + bucket = hash_long((unsigned long)ce->e_bdev +
1940 + (ce->e_block & 0xffffff), cache->c_bucket_bits);
1941 + list_add(&ce->e_block_list, &cache->c_block_hash[bucket]);
1942 + for (n=0; n<mb_cache_indexes(cache); n++) {
1943 + bucket = hash_long(ce->e_indexes[n].o_key,
1944 + cache->c_bucket_bits);
1945 + list_add(&ce->e_indexes[n].o_list,
1946 + &cache->c_indexes_hash[n][bucket]);
1952 + * Remove the cache entry from all hashes.
1955 +__mb_cache_entry_unlink(struct mb_cache_entry *ce)
1959 + list_del_init(&ce->e_block_list);
1960 + for (n = 0; n < mb_cache_indexes(ce->e_cache); n++)
1961 + list_del(&ce->e_indexes[n].o_list);
1966 +__mb_cache_entry_is_linked(struct mb_cache_entry *ce)
1968 + return (!list_empty(&ce->e_block_list));
1972 +static inline struct mb_cache_entry *
1973 +__mb_cache_entry_read(struct mb_cache_entry *ce)
1975 + __mb_cache_entry_takeout_lru(ce);
1976 + atomic_inc(&ce->e_used);
1982 +__mb_cache_entry_forget(struct mb_cache_entry *ce)
1984 + struct mb_cache *cache = ce->e_cache;
1986 + mb_assert(atomic_read(&ce->e_used) == 0);
1987 + atomic_dec(&cache->c_entry_count);
1988 + if (cache->c_op.free)
1989 + cache->c_op.free(ce);
1990 + kmem_cache_free(cache->c_entry_cache, ce);
1995 +__mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
1997 + if (atomic_dec_and_test(&ce->e_used)) {
1998 + if (!__mb_cache_entry_is_linked(ce))
2000 + __mb_cache_entry_into_lru(ce);
2002 + spin_unlock(&mb_cache_spinlock);
2005 + spin_unlock(&mb_cache_spinlock);
2006 + __mb_cache_entry_forget(ce);
2011 + * mb_cache_shrink_fn() memory pressure callback
2013 + * This function is called by the kernel memory management when memory
2016 + * @nr_to_scan: Number of objects to scan
2017 + * @gfp_mask: (ignored)
2019 + * Returns the number of objects which are present in the cache.
2022 +mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask)
2024 + LIST_HEAD(free_list);
2025 + struct list_head *l;
2028 + spin_lock(&mb_cache_spinlock);
2029 + list_for_each_prev(l, &mb_cache_list) {
2030 + struct mb_cache *cache =
2031 + list_entry(l, struct mb_cache, c_cache_list);
2032 + mb_debug("cache %s (%d)", cache->c_name,
2033 + atomic_read(&cache->c_entry_count));
2034 + count += atomic_read(&cache->c_entry_count);
2036 + mb_debug("trying to free %d entries", nr_to_scan);
2037 + if (nr_to_scan == 0) {
2038 + spin_unlock(&mb_cache_spinlock);
2041 + while (nr_to_scan && !list_empty(&mb_cache_lru_list)) {
2042 + struct mb_cache_entry *ce =
2043 + list_entry(mb_cache_lru_list.prev,
2044 + struct mb_cache_entry, e_lru_list);
2045 + list_move(&ce->e_lru_list, &free_list);
2046 + if (__mb_cache_entry_is_linked(ce))
2047 + __mb_cache_entry_unlink(ce);
2050 + spin_unlock(&mb_cache_spinlock);
2051 + l = free_list.prev;
2052 + while (l != &free_list) {
2053 + struct mb_cache_entry *ce = list_entry(l,
2054 + struct mb_cache_entry, e_lru_list);
2056 + __mb_cache_entry_forget(ce);
2060 + mb_debug("%d remaining entries ", count);
2066 + * mb_cache_create() create a new cache
2068 + * All entries in one cache are equal size. Cache entries may be from
2069 + * multiple devices. If this is the first mbcache created, registers
2070 + * the cache with kernel memory management. Returns NULL if no more
2071 + * memory was available.
2073 + * @name: name of the cache (informal)
2074 + * @cache_op: contains the callback called when freeing a cache entry
2075 + * @entry_size: The size of a cache entry, including
2076 + * struct mb_cache_entry
2077 + * @indexes_count: number of additional indexes in the cache. Must equal
2078 + * MB_CACHE_INDEXES_COUNT if the number of indexes is
2080 + * @bucket_bits: log2(number of hash buckets)
2083 +mb_cache_create(const char *name, struct mb_cache_op *cache_op,
2084 + size_t entry_size, int indexes_count, int bucket_bits)
2086 + int m=0, n, bucket_count = 1 << bucket_bits;
2087 + struct mb_cache *cache = NULL;
2089 + if(entry_size < sizeof(struct mb_cache_entry) +
2090 + indexes_count * sizeof(struct mb_cache_entry_index))
2093 + cache = kmalloc(sizeof(struct mb_cache) +
2094 + indexes_count * sizeof(struct list_head), GFP_KERNEL);
2097 + cache->c_name = name;
2099 + cache->c_op.free = cache_op->free;
2101 + cache->c_op.free = NULL;
2102 + atomic_set(&cache->c_entry_count, 0);
2103 + cache->c_bucket_bits = bucket_bits;
2104 +#ifdef MB_CACHE_INDEXES_COUNT
2105 + mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT);
2107 + cache->c_indexes_count = indexes_count;
2109 + cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head),
2111 + if (!cache->c_block_hash)
2113 + for (n=0; n<bucket_count; n++)
2114 + INIT_LIST_HEAD(&cache->c_block_hash[n]);
2115 + for (m=0; m<indexes_count; m++) {
2116 + cache->c_indexes_hash[m] = kmalloc(bucket_count *
2117 + sizeof(struct list_head),
2119 + if (!cache->c_indexes_hash[m])
2121 + for (n=0; n<bucket_count; n++)
2122 + INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]);
2124 + cache->c_entry_cache = kmem_cache_create(name, entry_size, 0,
2125 + 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL);
2126 + if (!cache->c_entry_cache)
2129 + spin_lock(&mb_cache_spinlock);
2130 + if (list_empty(&mb_cache_list)) {
2131 + if (mb_shrinker) {
2132 + printk(KERN_ERR "%s: already have a shrinker!\n",
2134 + remove_shrinker(mb_shrinker);
2136 + mb_shrinker = set_shrinker(DEFAULT_SEEKS, mb_cache_shrink_fn);
2138 + list_add(&cache->c_cache_list, &mb_cache_list);
2139 + spin_unlock(&mb_cache_spinlock);
2145 + kfree(cache->c_indexes_hash[m]);
2146 + if (cache->c_block_hash)
2147 + kfree(cache->c_block_hash);
2155 + * mb_cache_shrink()
2157 + * Removes all cache entires of a device from the cache. All cache entries
2158 + * currently in use cannot be freed, and thus remain in the cache. All others
2161 + * @cache: which cache to shrink
2162 + * @bdev: which device's cache entries to shrink
2165 +mb_cache_shrink(struct mb_cache *cache, struct block_device *bdev)
2167 + LIST_HEAD(free_list);
2168 + struct list_head *l;
2170 + spin_lock(&mb_cache_spinlock);
2171 + l = mb_cache_lru_list.prev;
2172 + while (l != &mb_cache_lru_list) {
2173 + struct mb_cache_entry *ce =
2174 + list_entry(l, struct mb_cache_entry, e_lru_list);
2176 + if (ce->e_bdev == bdev) {
2177 + list_move(&ce->e_lru_list, &free_list);
2178 + if (__mb_cache_entry_is_linked(ce))
2179 + __mb_cache_entry_unlink(ce);
2182 + spin_unlock(&mb_cache_spinlock);
2183 + l = free_list.prev;
2184 + while (l != &free_list) {
2185 + struct mb_cache_entry *ce =
2186 + list_entry(l, struct mb_cache_entry, e_lru_list);
2188 + __mb_cache_entry_forget(ce);
2194 + * mb_cache_destroy()
2196 + * Shrinks the cache to its minimum possible size (hopefully 0 entries),
2197 + * and then destroys it. If this was the last mbcache, un-registers the
2198 + * mbcache from kernel memory management.
2201 +mb_cache_destroy(struct mb_cache *cache)
2203 + LIST_HEAD(free_list);
2204 + struct list_head *l;
2207 + spin_lock(&mb_cache_spinlock);
2208 + l = mb_cache_lru_list.prev;
2209 + while (l != &mb_cache_lru_list) {
2210 + struct mb_cache_entry *ce =
2211 + list_entry(l, struct mb_cache_entry, e_lru_list);
2213 + if (ce->e_cache == cache) {
2214 + list_move(&ce->e_lru_list, &free_list);
2215 + if (__mb_cache_entry_is_linked(ce))
2216 + __mb_cache_entry_unlink(ce);
2219 + list_del(&cache->c_cache_list);
2220 + if (list_empty(&mb_cache_list) && mb_shrinker) {
2221 + remove_shrinker(mb_shrinker);
2224 + spin_unlock(&mb_cache_spinlock);
2226 + l = free_list.prev;
2227 + while (l != &free_list) {
2228 + struct mb_cache_entry *ce =
2229 + list_entry(l, struct mb_cache_entry, e_lru_list);
2231 + __mb_cache_entry_forget(ce);
2234 + if (atomic_read(&cache->c_entry_count) > 0) {
2235 + mb_error("cache %s: %d orphaned entries",
2237 + atomic_read(&cache->c_entry_count));
2240 + kmem_cache_destroy(cache->c_entry_cache);
2242 + for (n=0; n < mb_cache_indexes(cache); n++)
2243 + kfree(cache->c_indexes_hash[n]);
2244 + kfree(cache->c_block_hash);
2251 + * mb_cache_entry_alloc()
2253 + * Allocates a new cache entry. The new entry will not be valid initially,
2254 + * and thus cannot be looked up yet. It should be filled with data, and
2255 + * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
2256 + * if no more memory was available.
2258 +struct mb_cache_entry *
2259 +mb_cache_entry_alloc(struct mb_cache *cache)
2261 + struct mb_cache_entry *ce;
2263 + atomic_inc(&cache->c_entry_count);
2264 + ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL);
2266 + INIT_LIST_HEAD(&ce->e_lru_list);
2267 + INIT_LIST_HEAD(&ce->e_block_list);
2268 + ce->e_cache = cache;
2269 + atomic_set(&ce->e_used, 1);
2276 + * mb_cache_entry_insert()
2278 + * Inserts an entry that was allocated using mb_cache_entry_alloc() into
2279 + * the cache. After this, the cache entry can be looked up, but is not yet
2280 + * in the lru list as the caller still holds a handle to it. Returns 0 on
2281 + * success, or -EBUSY if a cache entry for that device + inode exists
2282 + * already (this may happen after a failed lookup, but when another process
2283 + * has inserted the same cache entry in the meantime).
2285 + * @bdev: device the cache entry belongs to
2286 + * @block: block number
2287 + * @keys: array of additional keys. There must be indexes_count entries
2288 + * in the array (as specified when creating the cache).
2291 +mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
2292 + sector_t block, unsigned int keys[])
2294 + struct mb_cache *cache = ce->e_cache;
2295 + unsigned int bucket;
2296 + struct list_head *l;
2297 + int error = -EBUSY, n;
2299 + bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
2300 + cache->c_bucket_bits);
2301 + spin_lock(&mb_cache_spinlock);
2302 + list_for_each_prev(l, &cache->c_block_hash[bucket]) {
2303 + struct mb_cache_entry *ce =
2304 + list_entry(l, struct mb_cache_entry, e_block_list);
2305 + if (ce->e_bdev == bdev && ce->e_block == block)
2308 + mb_assert(!__mb_cache_entry_is_linked(ce));
2309 + ce->e_bdev = bdev;
2310 + ce->e_block = block;
2311 + for (n=0; n<mb_cache_indexes(cache); n++)
2312 + ce->e_indexes[n].o_key = keys[n];
2313 + __mb_cache_entry_link(ce);
2315 + spin_unlock(&mb_cache_spinlock);
2321 + * mb_cache_entry_release()
2323 + * Release a handle to a cache entry. When the last handle to a cache entry
2324 + * is released it is either freed (if it is invalid) or otherwise inserted
2325 + * in to the lru list.
2328 +mb_cache_entry_release(struct mb_cache_entry *ce)
2330 + spin_lock(&mb_cache_spinlock);
2331 + __mb_cache_entry_release_unlock(ce);
2336 + * mb_cache_entry_takeout()
2338 + * Take a cache entry out of the cache, making it invalid. The entry can later
2339 + * be re-inserted using mb_cache_entry_insert(), or released using
2340 + * mb_cache_entry_release().
2343 +mb_cache_entry_takeout(struct mb_cache_entry *ce)
2345 + spin_lock(&mb_cache_spinlock);
2346 + mb_assert(!__mb_cache_entry_in_lru(ce));
2347 + if (__mb_cache_entry_is_linked(ce))
2348 + __mb_cache_entry_unlink(ce);
2349 + spin_unlock(&mb_cache_spinlock);
2354 + * mb_cache_entry_free()
2356 + * This is equivalent to the sequence mb_cache_entry_takeout() --
2357 + * mb_cache_entry_release().
2360 +mb_cache_entry_free(struct mb_cache_entry *ce)
2362 + spin_lock(&mb_cache_spinlock);
2363 + mb_assert(!__mb_cache_entry_in_lru(ce));
2364 + if (__mb_cache_entry_is_linked(ce))
2365 + __mb_cache_entry_unlink(ce);
2366 + __mb_cache_entry_release_unlock(ce);
2371 + * mb_cache_entry_dup()
2373 + * Duplicate a handle to a cache entry (does not duplicate the cache entry
2374 + * itself). After the call, both the old and the new handle must be released.
2376 +struct mb_cache_entry *
2377 +mb_cache_entry_dup(struct mb_cache_entry *ce)
2379 + atomic_inc(&ce->e_used);
2385 + * mb_cache_entry_get()
2387 + * Get a cache entry by device / block number. (There can only be one entry
2388 + * in the cache per device and block.) Returns NULL if no such cache entry
2391 +struct mb_cache_entry *
2392 +mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
2395 + unsigned int bucket;
2396 + struct list_head *l;
2397 + struct mb_cache_entry *ce;
2399 + bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
2400 + cache->c_bucket_bits);
2401 + spin_lock(&mb_cache_spinlock);
2402 + list_for_each(l, &cache->c_block_hash[bucket]) {
2403 + ce = list_entry(l, struct mb_cache_entry, e_block_list);
2404 + if (ce->e_bdev == bdev && ce->e_block == block) {
2405 + ce = __mb_cache_entry_read(ce);
2412 + spin_unlock(&mb_cache_spinlock);
2416 +#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
2418 +static struct mb_cache_entry *
2419 +__mb_cache_entry_find(struct list_head *l, struct list_head *head,
2420 + int index, struct block_device *bdev, unsigned int key)
2422 + while (l != head) {
2423 + struct mb_cache_entry *ce =
2424 + list_entry(l, struct mb_cache_entry,
2425 + e_indexes[index].o_list);
2426 + if (ce->e_bdev == bdev &&
2427 + ce->e_indexes[index].o_key == key) {
2428 + ce = __mb_cache_entry_read(ce);
2439 + * mb_cache_entry_find_first()
2441 + * Find the first cache entry on a given device with a certain key in
2442 + * an additional index. Additonal matches can be found with
2443 + * mb_cache_entry_find_next(). Returns NULL if no match was found.
2445 + * @cache: the cache to search
2446 + * @index: the number of the additonal index to search (0<=index<indexes_count)
2447 + * @bdev: the device the cache entry should belong to
2448 + * @key: the key in the index
2450 +struct mb_cache_entry *
2451 +mb_cache_entry_find_first(struct mb_cache *cache, int index,
2452 + struct block_device *bdev, unsigned int key)
2454 + unsigned int bucket = hash_long(key, cache->c_bucket_bits);
2455 + struct list_head *l;
2456 + struct mb_cache_entry *ce;
2458 + mb_assert(index < mb_cache_indexes(cache));
2459 + spin_lock(&mb_cache_spinlock);
2460 + l = cache->c_indexes_hash[index][bucket].next;
2461 + ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket],
2462 + index, bdev, key);
2463 + spin_unlock(&mb_cache_spinlock);
2469 + * mb_cache_entry_find_next()
2471 + * Find the next cache entry on a given device with a certain key in an
2472 + * additional index. Returns NULL if no match could be found. The previous
2473 + * entry is atomatically released, so that mb_cache_entry_find_next() can
2474 + * be called like this:
2476 + * entry = mb_cache_entry_find_first();
2479 + * entry = mb_cache_entry_find_next(entry, ...);
2482 + * @prev: The previous match
2483 + * @index: the number of the additonal index to search (0<=index<indexes_count)
2484 + * @bdev: the device the cache entry should belong to
2485 + * @key: the key in the index
2487 +struct mb_cache_entry *
2488 +mb_cache_entry_find_next(struct mb_cache_entry *prev, int index,
2489 + struct block_device *bdev, unsigned int key)
2491 + struct mb_cache *cache = prev->e_cache;
2492 + unsigned int bucket = hash_long(key, cache->c_bucket_bits);
2493 + struct list_head *l;
2494 + struct mb_cache_entry *ce;
2496 + mb_assert(index < mb_cache_indexes(cache));
2497 + spin_lock(&mb_cache_spinlock);
2498 + l = prev->e_indexes[index].o_list.next;
2499 + ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket],
2500 + index, bdev, key);
2501 + __mb_cache_entry_release_unlock(prev);
2505 +#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
2506 diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
2507 --- a/include/linux/ext3_fs.h Sun Dec 8 02:49:56 2002
2508 +++ b/include/linux/ext3_fs.h Sun Dec 8 02:49:56 2002
2511 #define EXT3_BAD_INO 1 /* Bad blocks inode */
2512 #define EXT3_ROOT_INO 2 /* Root inode */
2513 -#define EXT3_ACL_IDX_INO 3 /* ACL inode */
2514 -#define EXT3_ACL_DATA_INO 4 /* ACL inode */
2515 #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
2516 #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
2517 #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
2520 # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
2522 -#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry))
2523 #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
2525 # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
2526 @@ -130,28 +127,6 @@
2532 -struct ext3_acl_header /* Header of Access Control Lists */
2535 - __u32 aclh_file_count;
2536 - __u32 aclh_acle_count;
2537 - __u32 aclh_first_acle;
2540 -struct ext3_acl_entry /* Access Control List Entry */
2543 - __u16 acle_perms; /* Access permissions */
2544 - __u16 acle_type; /* Type of entry */
2545 - __u16 acle_tag; /* User or group identity */
2547 - __u32 acle_next; /* Pointer on next entry for the */
2548 - /* same inode or on next free entry */
2552 * Structure of a blocks group descriptor
2554 struct ext3_group_desc
2556 #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */
2557 #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
2558 #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
2559 +#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
2561 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
2562 #ifndef _LINUX_EXT2_FS_H
2564 #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
2565 #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
2567 -#define EXT3_FEATURE_COMPAT_SUPP 0
2568 +#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
2569 #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
2570 EXT3_FEATURE_INCOMPAT_RECOVER)
2571 #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
2576 +extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
2577 extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
2578 extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
2580 @@ -781,8 +758,10 @@
2583 extern struct inode_operations ext3_dir_inode_operations;
2584 +extern struct inode_operations ext3_special_inode_operations;
2587 +extern struct inode_operations ext3_symlink_inode_operations;
2588 extern struct inode_operations ext3_fast_symlink_inode_operations;
2591 diff -Nru a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
2592 --- a/include/linux/ext3_jbd.h Sun Dec 8 02:49:56 2002
2593 +++ b/include/linux/ext3_jbd.h Sun Dec 8 02:49:56 2002
2596 #define EXT3_SINGLEDATA_TRANS_BLOCKS 8
2598 +/* Extended attributes may touch two data buffers, two bitmap buffers,
2599 + * and two group and summaries. */
2601 +#define EXT3_XATTR_TRANS_BLOCKS 8
2603 /* Define the minimum size for a transaction which modifies data. This
2604 * needs to take into account the fact that we may end up modifying two
2605 * quota files too (one for the group, one for the user quota). The
2606 * superblock only gets updated once, of course, so don't bother
2607 * counting that again for the quota updates. */
2609 -#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2)
2610 +#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \
2611 + EXT3_XATTR_TRANS_BLOCKS - 2)
2613 extern int ext3_writepage_trans_blocks(struct inode *inode);
2615 diff -Nru a/include/linux/mbcache.h b/include/linux/mbcache.h
2616 --- /dev/null Wed Dec 31 16:00:00 1969
2617 +++ b/include/linux/mbcache.h Sun Dec 8 02:49:56 2002
2620 + File: linux/mbcache.h
2622 + (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
2625 +/* Hardwire the number of additional indexes */
2626 +#define MB_CACHE_INDEXES_COUNT 1
2628 +struct mb_cache_entry;
2630 +struct mb_cache_op {
2631 + void (*free)(struct mb_cache_entry *);
2635 + struct list_head c_cache_list;
2636 + const char *c_name;
2637 + struct mb_cache_op c_op;
2638 + atomic_t c_entry_count;
2639 + int c_bucket_bits;
2640 +#ifndef MB_CACHE_INDEXES_COUNT
2641 + int c_indexes_count;
2643 + kmem_cache_t *c_entry_cache;
2644 + struct list_head *c_block_hash;
2645 + struct list_head *c_indexes_hash[0];
2648 +struct mb_cache_entry_index {
2649 + struct list_head o_list;
2650 + unsigned int o_key;
2653 +struct mb_cache_entry {
2654 + struct list_head e_lru_list;
2655 + struct mb_cache *e_cache;
2657 + struct block_device *e_bdev;
2659 + struct list_head e_block_list;
2660 + struct mb_cache_entry_index e_indexes[0];
2663 +/* Functions on caches */
2665 +struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t,
2667 +void mb_cache_shrink(struct mb_cache *, struct block_device *);
2668 +void mb_cache_destroy(struct mb_cache *);
2670 +/* Functions on cache entries */
2672 +struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *);
2673 +int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *,
2674 + sector_t, unsigned int[]);
2675 +void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]);
2676 +void mb_cache_entry_release(struct mb_cache_entry *);
2677 +void mb_cache_entry_takeout(struct mb_cache_entry *);
2678 +void mb_cache_entry_free(struct mb_cache_entry *);
2679 +struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *);
2680 +struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *,
2681 + struct block_device *,
2683 +#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
2684 +struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int,
2685 + struct block_device *,
2687 +struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
2688 + struct block_device *,