From 169ae4453906f15ee0ad4e1787b8d4676a5b2ccf Mon Sep 17 00:00:00 2001 From: rread Date: Fri, 31 Oct 2003 06:53:23 +0000 Subject: [PATCH] merge devel zcfg --- lnet/include/lnet/socklnd.h | 13 + lustre/doc/lctl.8 | 307 ++ lustre/doc/lfs.1 | 53 + lustre/doc/lwizard.1 | 84 + .../kernel_patches/patches/ext3-htree-suse.patch | 2553 ++++++++++ .../patches/ext3-inode-reuse-2.4.18.patch | 350 ++ .../patches/ext3-inode-reuse-2.4.20.patch | 352 ++ .../patches/ext3-inode-reuse-2.4.22.patch | 187 + .../patches/ext3-orphan_lock-suse.patch | 81 + .../patches/linux-2.4.21-xattr-0.8.54-suse.patch | 5349 ++++++++++++++++++++ .../patches/netconsole-2.4.20-rh.patch | 406 ++ lustre/kernel_patches/series/suse-2.4.21 | 34 + lustre/portals/include/portals/socknal.h | 13 + lustre/utils/wirehdr.c | 6 +- lustre/utils/wiretest.c | 672 +++ 15 files changed, 10458 insertions(+), 2 deletions(-) create mode 100644 lnet/include/lnet/socklnd.h create mode 100644 lustre/doc/lctl.8 create mode 100644 lustre/doc/lfs.1 create mode 100644 lustre/doc/lwizard.1 create mode 100644 lustre/kernel_patches/patches/ext3-htree-suse.patch create mode 100644 lustre/kernel_patches/patches/ext3-inode-reuse-2.4.18.patch create mode 100644 lustre/kernel_patches/patches/ext3-inode-reuse-2.4.20.patch create mode 100644 lustre/kernel_patches/patches/ext3-inode-reuse-2.4.22.patch create mode 100644 lustre/kernel_patches/patches/ext3-orphan_lock-suse.patch create mode 100644 lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch create mode 100644 lustre/kernel_patches/patches/netconsole-2.4.20-rh.patch create mode 100644 lustre/kernel_patches/series/suse-2.4.21 create mode 100644 lustre/portals/include/portals/socknal.h create mode 100644 lustre/utils/wiretest.c diff --git a/lnet/include/lnet/socklnd.h b/lnet/include/lnet/socklnd.h new file mode 100644 index 0000000..6d75e5f --- /dev/null +++ b/lnet/include/lnet/socklnd.h @@ -0,0 +1,13 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * + * + * #defines shared between socknal implementation and utilities + */ + +#define SOCKNAL_CONN_ANY 0 +#define SOCKNAL_CONN_CONTROL 1 +#define SOCKNAL_CONN_BULK_IN 2 +#define SOCKNAL_CONN_BULK_OUT 3 +#define SOCKNAL_CONN_NTYPES 4 diff --git a/lustre/doc/lctl.8 b/lustre/doc/lctl.8 new file mode 100644 index 0000000..1e112dd --- /dev/null +++ b/lustre/doc/lctl.8 @@ -0,0 +1,307 @@ +.TH lctl 1 "2003 Oct 8" Lustre "configuration utilities" +.SH NAME +lctl \- Low level Lustre filesystem configuration utility +.SH SYNOPSIS +.br +.B lctl +.br +.B lctl --device +.br +.B lctl --threads +.br +.SH DESCRIPTION +.B lctl +can be invoked in interactive mode by issuing lctl command. After that, commands are issued as below. The most common commands in lctl are (in matching pairs) +.B device +and +.B attach +, +.B detach +and +.B setup +, +.B cleanup +and +.B connect +, +.B disconnect +and +.B help +, and +.B quit. + +To get a complete listing of available commands, type help at the lctl prompt. To get basic help on the meaning and syntax of a command, type help command. Command completion is activated with the TAB key, and command history is available via the up- and down-arrow keys. + +For non-interactive single-threaded use, one uses the second invocation, which runs command after connecting to the device. + +.B Network Configuration +.TP +network +Indicate what kind of network applies for the configuration commands that follow. +.TP +connect [[ ] | ] +This will establish a connection to a remote network network id given by the hostname/port combination, or the elan id. +.TP +disconnect +Disconnect from a remote nid. +.TP +mynid [nid] +Informs the socknal of the local nid. It defaults to hostname for tcp networks and is automatically setup for elan/myrinet networks. +.TP +add_uuid +Associate a given UUID with an nid. +.TP +close_uuid +Disconnect a UUID. +.TP +del_uuid +Delete a UUID association. +.TP +add_route [target] +Add an entry to the routing table for the given target. +.TP +del_route +Delete an entry for the target from the routing table. +.TP +route_list +Print the complete routing table. +.TP +recv_mem [size] +Set the socket receive buffer size; if the size is omitted, the default size for the buffer is printed. +.TP +send_mem [size] +Set send buffer size for the socket; if size is omitted, the default size for the buffer is printed. +.TP +nagle [on/off] +Enable/disable nagle; omitting the argument will cause the default value to be printed. +.TP +fail nid|all [count] +Fail/restore communications. Ommiting tha count implies fail indefinitely, count of zero indicates that communication should be restored. A non-zero count indicates the number of portals messages to be dropped after which the communication is restored. +.PP +.B Device Selection +.TP +newdev +Create a new device. +.TP +name2dev +This command can be used to determine a device number for the given device name. +.TP +device +This will select the specified OBD device. All other commands depend on the device being set. +.TP +device_list +Show all the devices. +.TP +lustre_build_version +Print the Lustre build version. +.PP +.B Device Configuration +.TP +attach type [name [uuid]] +Attach a type to the current device (which you need to set using the device command) and give that device a name and UUID. This allows us to identify the device for use later, and also tells us what type of device we will have. +.TP +setup +Type specific device setup commands. For obdfilter, a setup command tells the driver which block device it should use for storage and what type of filesystem is on that device. +.TP +cleanup +Cleanup a previously setup device. +.TP +detach +Remove driver (and name and UUID) from the current device. +.TP +lov_setconfig lov-uuid stripe-count default-stripe-size offset pattern UUID1 [UUID2...] +Write LOV configuration to an MDS device. +.TP +lov_getconfig lov-uuid +Read LOV configuration from an MDS device. Returns default-stripe-count, default-stripe-size, offset, pattern, and a list of OST UUID's. +.PP +.B Device Operations +.TP +probe [timeout] +Build a connection handle to a device. This command is used to suspend configuration until the lctl command has ensured that the MDS and OSC services are available. This is to avoid mount failures in a rebooting cluster. +.TP +close +Close the connection handle +.TP +getattr +Get attributes for an OST object . +.TP +setattr +Set mode attribute for OST object . +.TP +create [num [mode [verbose]]] +Create the specified number of OST objects with the given . +.TP +destroy +Starting at , destroy number of objects starting from the object with object id . +.TP +test_getattr [verbose [[t]objid]] +Do getattrs on OST object (objectid+1 on each thread). +.TP +test_brw [t] [write [verbose [npages [[t]objid]]]] +Do bulk read/writes on OST object ( per I/O). +.TP +test_ldlm +Perform lock manager test. +.TP +ldlm_regress_start %s [numthreads [refheld [numres [numext]]]] +Start lock manager stress test. +.TP +ldlm_regress_stop +Stop lock manager stress test. +.TP +dump_ldlm +Dump all lock manager state, this is very useful for debugging +.TP +activate +Activate an import +.TP +deacttivate +De-activate an import +.TP +recover +.TP +lookup +.TP +notransno +Disable sending of committed transnumber updates +.TP +readonly +Disable writes to the underlying device +.TP +abort_recovery +Abort recovery on MDS device +.TP +mount_option +Dump mount options to a file +.TP +get_stripe +Show stripe info for an echo client object. +.TP +set_stripe [ width!count[@offset] [:id:id....] +Set stripe info for an echo client +.TP +unset_stripe +Unset stripe info for an echo client object. +.PP +.B Debug +.TP +debug_daemon +Debug daemon control and dump to a file +.TP +debug_kernel [file] [raw] +Get debug buffer and dump to a fileusage. +.TP +debug_file [output] [raw] +Read debug buffer from input and dump to outputusage. +.TP +clear +Clear kernel debug buffer. +.TP +mark +Insert marker text in kernel debug buffer. +.TP +filter +Filter message type from the kernel debug buffer. +.TP +show +Show specific type of messages. +.TP +debug_list +List all the subsystem and debug types. +.TP +panic +Force the kernel to panic. +.PP +.B Control +.TP +help +Show a complete list of commands; help can be used to get help on specific command. +.TP +exit +Close the lctl session. +.TP +quit +Close the lctl session. + +.SH OPTIONS +The following options can be used to invoke lctl. +.TP +.B --device +The device number to be used for the operation. The value of devno is an integer, normally found by calling lctl name2dev on a device name. +.TP +.B --threads +How many threads should be forked doing the command specified. The numthreads variable is a strictly positive integer indicating how many threads should be started. The devno option is used as above. +.TP +.B --ignore_errors | ignore_errors +Ignore errors during script processing +.TP +.B dump +Save ioctls to a file +.SH EXAMPLES +.B attach + +# lctl +.br +lctl > newdev +.br +lctl > attach obdfilter OBDDEV OBDUUID + +.B connect + +lctl > name2dev OSCDEV 2 +.br +lctl > device 2 +.br +lctl > connect + +.B getattr + +lctl > getattr 12 +.br +id: 12 +.br +grp: 0 +.br +atime: 1002663714 +.br +mtime: 1002663535 +.br +ctime: 1002663535 +.br +size: 10 +.br +blocks: 8 +.br +blksize: 4096 +.br +mode: 100644 +.br +uid: 0 +.br +gid: 0 +.br +flags: 0 +.br +obdflags: 0 +.br +nlink: 1 +.br +valid: ffffffff +.br +inline: +.br +obdmd: +.br +lctl > disconnect +.br +Finished (success) + +.B setup + +lctl > setup /dev/loop0 extN +.br +lctl > quit + +.SH BUGS +None are known. diff --git a/lustre/doc/lfs.1 b/lustre/doc/lfs.1 new file mode 100644 index 0000000..5e676f2 --- /dev/null +++ b/lustre/doc/lfs.1 @@ -0,0 +1,53 @@ +.TH lfs 1 "2003 Oct 29" Lustre "configuration utilities" +.SH NAME +lfs \- Lustre utility to create a file with specific striping pattern, find the striping pattern of exiting files +.SH SYNOPSIS +.br +.B lfs +.br +.B lfs find [--obd ] [--quiet | --verbose] [--recursive] +.br +.B lfs getstripe +.br +.B lfs setstripe +.SH DESCRIPTION +.B lfs +can be used to create a new file with a specific striping pattern, determine the default striping pattern, gather the extended attributes (object numbers and +location) for a specific file. It can be invoked interactively without any +arguments or in a non-interactive mode with one of the arguements supported. +.SH OPTIONS +The various options supported by lctl are listed and explained below: +.TP +.B setstripe +To create a new file with a specific striping pattern +.TP +.B find +To list the extended attributes for a given filename or files in a directory or recursively for all files in a directory tree. It can also be used to list the files that have objects on a specific OST. +.TP +.B getstripe +To list the striping pattern for given filename +.TP +.B help +Provides brief help on the various arguments +.TP +.B exit/quit +Quit the interactive lfs session + +.SH EXAMPLES +.TP +.B $lfs setstripe /mnt/lustre/file1 131072 0 1 +This creats a file striped on one OST +.TP +.B $lfs find /mnt/lustre/file1 +Lists the extended attributes of a given file +.TP +.B $lfs find /mnt/lustre/ +Lists the extended attributes of all files in a given directory +.TP +.B $lfs find -r /mnt/lustre/ +Recursively list the extended attributes of all files in a given directory tree +.TP +.B $lfs find -r --obd OST2-UUID /mnt/lustre/ +List all the files that have objects on a specific OST +.SH BUGS +None are known. diff --git a/lustre/doc/lwizard.1 b/lustre/doc/lwizard.1 new file mode 100644 index 0000000..285cfdd --- /dev/null +++ b/lustre/doc/lwizard.1 @@ -0,0 +1,84 @@ +.TH lwizard 1 "2003 Oct 29" Lustre "Configuration utilities" +.SH NAME +lwizard \- Lustre configuration wizard +.SH SYNOPSIS +.br +.B lwizard +.br +.B lwizard [--help] +.br +.BR lwizard [-o|--file=CONFIG_FILE][--stripe_size=SIZE][--stripe_cnt=COUNT] +.SH DESCRIPTION +The configuration files for Lustre installation are generally created through a series of lmc commands, this generates an XML file which describes the complete cluster. The lwizard eliminates the need to learn lmc to generate configuration files, instead it achieves the same through asking some simple questions. The +XML configuration file generated using lwizard will still have to be made accessible to all the cluster nodes either by storing it on an LDAP server, NFS or by copying it over to all the involved nodes and then running lconf on all nodes to start up the various Lustre services, device setups or mounting the filesystem. +So, once invoked, lwizard asks a series of questions about the various pieces of the cluster : +.TP +.B MDS hostname +.TP +.B MDS device information +.TP +.B OST hostname +This will be asked for every new OST added +.TP +.B OST device information +This will be asked for every new OST added +.TP +.B Lustre mount-point +This is the Lustre mount-point on the client (default - /mnt/lustre) + +The wizard saves the XML file to the filename specified using the -o or --file option or the default file config.xml. It will also save the lmc commands used to create the XML file in a script config.sh or .sh. + +The lwizard tool currently assumes the following defaults: + +.TP +.B Network type +tcp +.TP +.B Filesystem type +ext3 +.TP +.B LMC path +.I /usr/sbin/lmc + +.SH EXAMPLES +The example below shows a sample session using lwizard. +.PP +[username@meghna utils]$ ./lwizard --stripe_size=64 --stripe_cnt=2 +.br +This script will help you create a Lustre configuration file. +.br +Creating mds "mds1"... +.br +Please enter the hostname(s) for mds1: meghna +.br +Please enter the device name or loop file name for meghna: /tmp/mds1 +.br +Please enter the device size or 0 to use entire device:5000 +.br +Creating ost "ost1"... +.br +Please enter the hostname(s) for ost1: meghna +.br +Please enter the device name or loop file name for meghna: /tmp/ost1 +.br +Please enter the device size or 0 to use entire device:10000 +.br +Creating ost "ost2"... +.br +Please enter the hostname(s) for ost2: +.br +Please enter the clients' mountpoint (/mnt/lustre): +.br +Creating mds "mds2"... +.br +Please enter the hostname(s) for mds2: +.br + mds1 lov1 ost1 client +.br +Saving configuration to config.xml: +.br +Your configuration has been saved to config.xml. +.br +Your config command has been save to config.sh. +.SH BUGS +None are known. diff --git a/lustre/kernel_patches/patches/ext3-htree-suse.patch b/lustre/kernel_patches/patches/ext3-htree-suse.patch new file mode 100644 index 0000000..ea55ad1 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-htree-suse.patch @@ -0,0 +1,2553 @@ + fs/ext3/Makefile | 2 + fs/ext3/dir.c | 302 +++++++++ + fs/ext3/file.c | 3 + fs/ext3/hash.c | 215 ++++++ + fs/ext3/namei.c | 1420 ++++++++++++++++++++++++++++++++++++++++----- + fs/ext3/super.c | 7 + include/linux/ext3_fs.h | 85 ++ + include/linux/ext3_fs_sb.h | 2 + include/linux/ext3_jbd.h | 2 + include/linux/rbtree.h | 2 + lib/rbtree.c | 42 + + 11 files changed, 1921 insertions(+), 161 deletions(-) + +Index: linux-2.4.21-suse/fs/ext3/dir.c +=================================================================== +--- linux-2.4.21-suse.orig/fs/ext3/dir.c 2001-11-10 01:25:04.000000000 +0300 ++++ linux-2.4.21-suse/fs/ext3/dir.c 2003-10-29 23:17:20.000000000 +0300 +@@ -21,12 +21,16 @@ + #include + #include + #include ++#include ++#include + + static unsigned char ext3_filetype_table[] = { + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK + }; + + static int ext3_readdir(struct file *, void *, filldir_t); ++static int ext3_dx_readdir(struct file * filp, ++ void * dirent, filldir_t filldir); + + struct file_operations ext3_dir_operations = { + read: generic_read_dir, +@@ -35,6 +39,17 @@ + fsync: ext3_sync_file, /* BKL held */ + }; + ++ ++static unsigned char get_dtype(struct super_block *sb, int filetype) ++{ ++ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || ++ (filetype >= EXT3_FT_MAX)) ++ return DT_UNKNOWN; ++ ++ return (ext3_filetype_table[filetype]); ++} ++ ++ + int ext3_check_dir_entry (const char * function, struct inode * dir, + struct ext3_dir_entry_2 * de, + struct buffer_head * bh, +@@ -79,6 +94,16 @@ + + sb = inode->i_sb; + ++ if (is_dx(inode)) { ++ err = ext3_dx_readdir(filp, dirent, filldir); ++ if (err != ERR_BAD_DX_DIR) ++ return err; ++ /* ++ * We don't set the inode dirty flag since it's not ++ * critical that it get flushed back to the disk. ++ */ ++ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; ++ } + stored = 0; + bh = NULL; + offset = filp->f_pos & (sb->s_blocksize - 1); +@@ -162,18 +187,12 @@ + * during the copy operation. + */ + unsigned long version = filp->f_version; +- unsigned char d_type = DT_UNKNOWN; + +- if (EXT3_HAS_INCOMPAT_FEATURE(sb, +- EXT3_FEATURE_INCOMPAT_FILETYPE) +- && de->file_type < EXT3_FT_MAX) +- d_type = +- ext3_filetype_table[de->file_type]; + error = filldir(dirent, de->name, + de->name_len, + filp->f_pos, + le32_to_cpu(de->inode), +- d_type); ++ get_dtype(sb, de->file_type)); + if (error) + break; + if (version != filp->f_version) +@@ -188,3 +207,272 @@ + UPDATE_ATIME(inode); + return 0; + } ++ ++#ifdef CONFIG_EXT3_INDEX ++/* ++ * These functions convert from the major/minor hash to an f_pos ++ * value. ++ * ++ * Currently we only use major hash numer. This is unfortunate, but ++ * on 32-bit machines, the same VFS interface is used for lseek and ++ * llseek, so if we use the 64 bit offset, then the 32-bit versions of ++ * lseek/telldir/seekdir will blow out spectacularly, and from within ++ * the ext2 low-level routine, we don't know if we're being called by ++ * a 64-bit version of the system call or the 32-bit version of the ++ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir ++ * cookie. Sigh. ++ */ ++#define hash2pos(major, minor) (major >> 1) ++#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) ++#define pos2min_hash(pos) (0) ++ ++/* ++ * This structure holds the nodes of the red-black tree used to store ++ * the directory entry in hash order. ++ */ ++struct fname { ++ __u32 hash; ++ __u32 minor_hash; ++ rb_node_t rb_hash; ++ struct fname *next; ++ __u32 inode; ++ __u8 name_len; ++ __u8 file_type; ++ char name[0]; ++}; ++ ++/* ++ * This functoin implements a non-recursive way of freeing all of the ++ * nodes in the red-black tree. ++ */ ++static void free_rb_tree_fname(rb_root_t *root) ++{ ++ rb_node_t *n = root->rb_node; ++ rb_node_t *parent; ++ struct fname *fname; ++ ++ while (n) { ++ /* Do the node's children first */ ++ if ((n)->rb_left) { ++ n = n->rb_left; ++ continue; ++ } ++ if (n->rb_right) { ++ n = n->rb_right; ++ continue; ++ } ++ /* ++ * The node has no children; free it, and then zero ++ * out parent's link to it. Finally go to the ++ * beginning of the loop and try to free the parent ++ * node. ++ */ ++ parent = n->rb_parent; ++ fname = rb_entry(n, struct fname, rb_hash); ++ kfree(fname); ++ if (!parent) ++ root->rb_node = 0; ++ else if (parent->rb_left == n) ++ parent->rb_left = 0; ++ else if (parent->rb_right == n) ++ parent->rb_right = 0; ++ n = parent; ++ } ++ root->rb_node = 0; ++} ++ ++ ++struct dir_private_info *create_dir_info(loff_t pos) ++{ ++ struct dir_private_info *p; ++ ++ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); ++ if (!p) ++ return NULL; ++ p->root.rb_node = 0; ++ p->curr_node = 0; ++ p->extra_fname = 0; ++ p->last_pos = 0; ++ p->curr_hash = pos2maj_hash(pos); ++ p->curr_minor_hash = pos2min_hash(pos); ++ p->next_hash = 0; ++ return p; ++} ++ ++void ext3_htree_free_dir_info(struct dir_private_info *p) ++{ ++ free_rb_tree_fname(&p->root); ++ kfree(p); ++} ++ ++/* ++ * Given a directory entry, enter it into the fname rb tree. ++ */ ++int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, ++ __u32 minor_hash, ++ struct ext3_dir_entry_2 *dirent) ++{ ++ rb_node_t **p, *parent = NULL; ++ struct fname * fname, *new_fn; ++ struct dir_private_info *info; ++ int len; ++ ++ info = (struct dir_private_info *) dir_file->private_data; ++ p = &info->root.rb_node; ++ ++ /* Create and allocate the fname structure */ ++ len = sizeof(struct fname) + dirent->name_len + 1; ++ new_fn = kmalloc(len, GFP_KERNEL); ++ if (!new_fn) ++ return -ENOMEM; ++ memset(new_fn, 0, len); ++ new_fn->hash = hash; ++ new_fn->minor_hash = minor_hash; ++ new_fn->inode = le32_to_cpu(dirent->inode); ++ new_fn->name_len = dirent->name_len; ++ new_fn->file_type = dirent->file_type; ++ memcpy(new_fn->name, dirent->name, dirent->name_len); ++ new_fn->name[dirent->name_len] = 0; ++ ++ while (*p) { ++ parent = *p; ++ fname = rb_entry(parent, struct fname, rb_hash); ++ ++ /* ++ * If the hash and minor hash match up, then we put ++ * them on a linked list. This rarely happens... ++ */ ++ if ((new_fn->hash == fname->hash) && ++ (new_fn->minor_hash == fname->minor_hash)) { ++ new_fn->next = fname->next; ++ fname->next = new_fn; ++ return 0; ++ } ++ ++ if (new_fn->hash < fname->hash) ++ p = &(*p)->rb_left; ++ else if (new_fn->hash > fname->hash) ++ p = &(*p)->rb_right; ++ else if (new_fn->minor_hash < fname->minor_hash) ++ p = &(*p)->rb_left; ++ else /* if (new_fn->minor_hash > fname->minor_hash) */ ++ p = &(*p)->rb_right; ++ } ++ ++ rb_link_node(&new_fn->rb_hash, parent, p); ++ rb_insert_color(&new_fn->rb_hash, &info->root); ++ return 0; ++} ++ ++ ++ ++/* ++ * This is a helper function for ext3_dx_readdir. It calls filldir ++ * for all entres on the fname linked list. (Normally there is only ++ * one entry on the linked list, unless there are 62 bit hash collisions.) ++ */ ++static int call_filldir(struct file * filp, void * dirent, ++ filldir_t filldir, struct fname *fname) ++{ ++ struct dir_private_info *info = filp->private_data; ++ loff_t curr_pos; ++ struct inode *inode = filp->f_dentry->d_inode; ++ struct super_block * sb; ++ int error; ++ ++ sb = inode->i_sb; ++ ++ if (!fname) { ++ printk("call_filldir: called with null fname?!?\n"); ++ return 0; ++ } ++ curr_pos = hash2pos(fname->hash, fname->minor_hash); ++ while (fname) { ++ error = filldir(dirent, fname->name, ++ fname->name_len, curr_pos, ++ fname->inode, ++ get_dtype(sb, fname->file_type)); ++ if (error) { ++ filp->f_pos = curr_pos; ++ info->extra_fname = fname->next; ++ return error; ++ } ++ fname = fname->next; ++ } ++ return 0; ++} ++ ++static int ext3_dx_readdir(struct file * filp, ++ void * dirent, filldir_t filldir) ++{ ++ struct dir_private_info *info = filp->private_data; ++ struct inode *inode = filp->f_dentry->d_inode; ++ struct fname *fname; ++ int ret; ++ ++ if (!info) { ++ info = create_dir_info(filp->f_pos); ++ if (!info) ++ return -ENOMEM; ++ filp->private_data = info; ++ } ++ ++ /* Some one has messed with f_pos; reset the world */ ++ if (info->last_pos != filp->f_pos) { ++ free_rb_tree_fname(&info->root); ++ info->curr_node = 0; ++ info->extra_fname = 0; ++ info->curr_hash = pos2maj_hash(filp->f_pos); ++ info->curr_minor_hash = pos2min_hash(filp->f_pos); ++ } ++ ++ /* ++ * If there are any leftover names on the hash collision ++ * chain, return them first. ++ */ ++ if (info->extra_fname && ++ call_filldir(filp, dirent, filldir, info->extra_fname)) ++ goto finished; ++ ++ if (!info->curr_node) ++ info->curr_node = rb_get_first(&info->root); ++ ++ while (1) { ++ /* ++ * Fill the rbtree if we have no more entries, ++ * or the inode has changed since we last read in the ++ * cached entries. ++ */ ++ if ((!info->curr_node) || ++ (filp->f_version != inode->i_version)) { ++ info->curr_node = 0; ++ free_rb_tree_fname(&info->root); ++ filp->f_version = inode->i_version; ++ ret = ext3_htree_fill_tree(filp, info->curr_hash, ++ info->curr_minor_hash, ++ &info->next_hash); ++ if (ret < 0) ++ return ret; ++ if (ret == 0) ++ break; ++ info->curr_node = rb_get_first(&info->root); ++ } ++ ++ fname = rb_entry(info->curr_node, struct fname, rb_hash); ++ info->curr_hash = fname->hash; ++ info->curr_minor_hash = fname->minor_hash; ++ if (call_filldir(filp, dirent, filldir, fname)) ++ break; ++ ++ info->curr_node = rb_get_next(info->curr_node); ++ if (!info->curr_node) { ++ info->curr_hash = info->next_hash; ++ info->curr_minor_hash = 0; ++ } ++ } ++finished: ++ info->last_pos = filp->f_pos; ++ UPDATE_ATIME(inode); ++ return 0; ++} ++#endif +Index: linux-2.4.21-suse/fs/ext3/file.c +=================================================================== +--- linux-2.4.21-suse.orig/fs/ext3/file.c 2002-11-29 02:53:15.000000000 +0300 ++++ linux-2.4.21-suse/fs/ext3/file.c 2003-10-29 23:17:20.000000000 +0300 +@@ -35,6 +35,9 @@ + { + if (filp->f_mode & FMODE_WRITE) + ext3_discard_prealloc (inode); ++ if (is_dx(inode) && filp->private_data) ++ ext3_htree_free_dir_info(filp->private_data); ++ + return 0; + } + +Index: linux-2.4.21-suse/fs/ext3/hash.c +=================================================================== +--- linux-2.4.21-suse.orig/fs/ext3/hash.c 2003-10-29 23:17:20.000000000 +0300 ++++ linux-2.4.21-suse/fs/ext3/hash.c 2003-10-29 23:17:20.000000000 +0300 +@@ -0,0 +1,215 @@ ++/* ++ * linux/fs/ext3/hash.c ++ * ++ * Copyright (C) 2002 by Theodore Ts'o ++ * ++ * This file is released under the GPL v2. ++ * ++ * This file may be redistributed under the terms of the GNU Public ++ * License. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#define DELTA 0x9E3779B9 ++ ++static void TEA_transform(__u32 buf[4], __u32 const in[]) ++{ ++ __u32 sum = 0; ++ __u32 b0 = buf[0], b1 = buf[1]; ++ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; ++ int n = 16; ++ ++ do { ++ sum += DELTA; ++ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); ++ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); ++ } while(--n); ++ ++ buf[0] += b0; ++ buf[1] += b1; ++} ++ ++/* F, G and H are basic MD4 functions: selection, majority, parity */ ++#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) ++#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) ++#define H(x, y, z) ((x) ^ (y) ^ (z)) ++ ++/* ++ * The generic round function. The application is so specific that ++ * we don't bother protecting all the arguments with parens, as is generally ++ * good macro practice, in favor of extra legibility. ++ * Rotation is separate from addition to prevent recomputation ++ */ ++#define ROUND(f, a, b, c, d, x, s) \ ++ (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) ++#define K1 0 ++#define K2 013240474631UL ++#define K3 015666365641UL ++ ++/* ++ * Basic cut-down MD4 transform. Returns only 32 bits of result. ++ */ ++static void halfMD4Transform (__u32 buf[4], __u32 const in[]) ++{ ++ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; ++ ++ /* Round 1 */ ++ ROUND(F, a, b, c, d, in[0] + K1, 3); ++ ROUND(F, d, a, b, c, in[1] + K1, 7); ++ ROUND(F, c, d, a, b, in[2] + K1, 11); ++ ROUND(F, b, c, d, a, in[3] + K1, 19); ++ ROUND(F, a, b, c, d, in[4] + K1, 3); ++ ROUND(F, d, a, b, c, in[5] + K1, 7); ++ ROUND(F, c, d, a, b, in[6] + K1, 11); ++ ROUND(F, b, c, d, a, in[7] + K1, 19); ++ ++ /* Round 2 */ ++ ROUND(G, a, b, c, d, in[1] + K2, 3); ++ ROUND(G, d, a, b, c, in[3] + K2, 5); ++ ROUND(G, c, d, a, b, in[5] + K2, 9); ++ ROUND(G, b, c, d, a, in[7] + K2, 13); ++ ROUND(G, a, b, c, d, in[0] + K2, 3); ++ ROUND(G, d, a, b, c, in[2] + K2, 5); ++ ROUND(G, c, d, a, b, in[4] + K2, 9); ++ ROUND(G, b, c, d, a, in[6] + K2, 13); ++ ++ /* Round 3 */ ++ ROUND(H, a, b, c, d, in[3] + K3, 3); ++ ROUND(H, d, a, b, c, in[7] + K3, 9); ++ ROUND(H, c, d, a, b, in[2] + K3, 11); ++ ROUND(H, b, c, d, a, in[6] + K3, 15); ++ ROUND(H, a, b, c, d, in[1] + K3, 3); ++ ROUND(H, d, a, b, c, in[5] + K3, 9); ++ ROUND(H, c, d, a, b, in[0] + K3, 11); ++ ROUND(H, b, c, d, a, in[4] + K3, 15); ++ ++ buf[0] += a; ++ buf[1] += b; ++ buf[2] += c; ++ buf[3] += d; ++} ++ ++#undef ROUND ++#undef F ++#undef G ++#undef H ++#undef K1 ++#undef K2 ++#undef K3 ++ ++/* The old legacy hash */ ++static __u32 dx_hack_hash (const char *name, int len) ++{ ++ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; ++ while (len--) { ++ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); ++ ++ if (hash & 0x80000000) hash -= 0x7fffffff; ++ hash1 = hash0; ++ hash0 = hash; ++ } ++ return (hash0 << 1); ++} ++ ++static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) ++{ ++ __u32 pad, val; ++ int i; ++ ++ pad = (__u32)len | ((__u32)len << 8); ++ pad |= pad << 16; ++ ++ val = pad; ++ if (len > num*4) ++ len = num * 4; ++ for (i=0; i < len; i++) { ++ if ((i % 4) == 0) ++ val = pad; ++ val = msg[i] + (val << 8); ++ if ((i % 4) == 3) { ++ *buf++ = val; ++ val = pad; ++ num--; ++ } ++ } ++ if (--num >= 0) ++ *buf++ = val; ++ while (--num >= 0) ++ *buf++ = pad; ++} ++ ++/* ++ * Returns the hash of a filename. If len is 0 and name is NULL, then ++ * this function can be used to test whether or not a hash version is ++ * supported. ++ * ++ * The seed is an 4 longword (32 bits) "secret" which can be used to ++ * uniquify a hash. If the seed is all zero's, then some default seed ++ * may be used. ++ * ++ * A particular hash version specifies whether or not the seed is ++ * represented, and whether or not the returned hash is 32 bits or 64 ++ * bits. 32 bit hashes will return 0 for the minor hash. ++ */ ++int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) ++{ ++ __u32 hash; ++ __u32 minor_hash = 0; ++ const char *p; ++ int i; ++ __u32 in[8], buf[4]; ++ ++ /* Initialize the default seed for the hash checksum functions */ ++ buf[0] = 0x67452301; ++ buf[1] = 0xefcdab89; ++ buf[2] = 0x98badcfe; ++ buf[3] = 0x10325476; ++ ++ /* Check to see if the seed is all zero's */ ++ if (hinfo->seed) { ++ for (i=0; i < 4; i++) { ++ if (hinfo->seed[i]) ++ break; ++ } ++ if (i < 4) ++ memcpy(buf, hinfo->seed, sizeof(buf)); ++ } ++ ++ switch (hinfo->hash_version) { ++ case DX_HASH_LEGACY: ++ hash = dx_hack_hash(name, len); ++ break; ++ case DX_HASH_HALF_MD4: ++ p = name; ++ while (len > 0) { ++ str2hashbuf(p, len, in, 8); ++ halfMD4Transform(buf, in); ++ len -= 32; ++ p += 32; ++ } ++ minor_hash = buf[2]; ++ hash = buf[1]; ++ break; ++ case DX_HASH_TEA: ++ p = name; ++ while (len > 0) { ++ str2hashbuf(p, len, in, 4); ++ TEA_transform(buf, in); ++ len -= 16; ++ p += 16; ++ } ++ hash = buf[0]; ++ minor_hash = buf[1]; ++ break; ++ default: ++ hinfo->hash = 0; ++ return -1; ++ } ++ hinfo->hash = hash & ~1; ++ hinfo->minor_hash = minor_hash; ++ return 0; ++} +Index: linux-2.4.21-suse/fs/ext3/Makefile +=================================================================== +--- linux-2.4.21-suse.orig/fs/ext3/Makefile 2003-10-29 22:39:14.000000000 +0300 ++++ linux-2.4.21-suse/fs/ext3/Makefile 2003-10-29 23:17:20.000000000 +0300 +@@ -12,7 +12,7 @@ + export-objs := super.o inode.o + + obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +- ioctl.o namei.o super.o symlink.o ++ ioctl.o namei.o super.o symlink.o hash.o + obj-m := $(O_TARGET) + + include $(TOPDIR)/Rules.make +Index: linux-2.4.21-suse/fs/ext3/namei.c +=================================================================== +--- linux-2.4.21-suse.orig/fs/ext3/namei.c 2003-06-13 18:51:37.000000000 +0400 ++++ linux-2.4.21-suse/fs/ext3/namei.c 2003-10-29 23:25:23.000000000 +0300 +@@ -16,6 +16,12 @@ + * David S. Miller (davem@caip.rutgers.edu), 1995 + * Directory entry file type support and forward compatibility hooks + * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 ++ * Hash Tree Directory indexing (c) ++ * Daniel Phillips, 2001 ++ * Hash Tree Directory indexing porting ++ * Christopher Li, 2002 ++ * Hash Tree Directory indexing cleanup ++ * Theodore Ts'o, 2002 + */ + + #include +@@ -38,6 +44,642 @@ + #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) + #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) + ++static struct buffer_head *ext3_append(handle_t *handle, ++ struct inode *inode, ++ u32 *block, int *err) ++{ ++ struct buffer_head *bh; ++ ++ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; ++ ++ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { ++ inode->i_size += inode->i_sb->s_blocksize; ++ EXT3_I(inode)->i_disksize = inode->i_size; ++ ext3_journal_get_write_access(handle,bh); ++ } ++ return bh; ++} ++ ++#ifndef assert ++#define assert(test) J_ASSERT(test) ++#endif ++ ++#ifndef swap ++#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) ++#endif ++ ++typedef struct { u32 v; } le_u32; ++typedef struct { u16 v; } le_u16; ++ ++#ifdef DX_DEBUG ++#define dxtrace(command) command ++#else ++#define dxtrace(command) ++#endif ++ ++struct fake_dirent ++{ ++ /*le*/u32 inode; ++ /*le*/u16 rec_len; ++ u8 name_len; ++ u8 file_type; ++}; ++ ++struct dx_countlimit ++{ ++ le_u16 limit; ++ le_u16 count; ++}; ++ ++struct dx_entry ++{ ++ le_u32 hash; ++ le_u32 block; ++}; ++ ++/* ++ * dx_root_info is laid out so that if it should somehow get overlaid by a ++ * dirent the two low bits of the hash version will be zero. Therefore, the ++ * hash version mod 4 should never be 0. Sincerely, the paranoia department. ++ */ ++ ++struct dx_root ++{ ++ struct fake_dirent dot; ++ char dot_name[4]; ++ struct fake_dirent dotdot; ++ char dotdot_name[4]; ++ struct dx_root_info ++ { ++ le_u32 reserved_zero; ++ u8 hash_version; ++ u8 info_length; /* 8 */ ++ u8 indirect_levels; ++ u8 unused_flags; ++ } ++ info; ++ struct dx_entry entries[0]; ++}; ++ ++struct dx_node ++{ ++ struct fake_dirent fake; ++ struct dx_entry entries[0]; ++}; ++ ++ ++struct dx_frame ++{ ++ struct buffer_head *bh; ++ struct dx_entry *entries; ++ struct dx_entry *at; ++}; ++ ++struct dx_map_entry ++{ ++ u32 hash; ++ u32 offs; ++}; ++ ++#ifdef CONFIG_EXT3_INDEX ++static inline unsigned dx_get_block (struct dx_entry *entry); ++static void dx_set_block (struct dx_entry *entry, unsigned value); ++static inline unsigned dx_get_hash (struct dx_entry *entry); ++static void dx_set_hash (struct dx_entry *entry, unsigned value); ++static unsigned dx_get_count (struct dx_entry *entries); ++static unsigned dx_get_limit (struct dx_entry *entries); ++static void dx_set_count (struct dx_entry *entries, unsigned value); ++static void dx_set_limit (struct dx_entry *entries, unsigned value); ++static unsigned dx_root_limit (struct inode *dir, unsigned infosize); ++static unsigned dx_node_limit (struct inode *dir); ++static struct dx_frame *dx_probe(struct dentry *dentry, ++ struct inode *dir, ++ struct dx_hash_info *hinfo, ++ struct dx_frame *frame, ++ int *err); ++static void dx_release (struct dx_frame *frames); ++static int dx_make_map (struct ext3_dir_entry_2 *de, int size, ++ struct dx_hash_info *hinfo, struct dx_map_entry map[]); ++static void dx_sort_map(struct dx_map_entry *map, unsigned count); ++static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, ++ struct dx_map_entry *offsets, int count); ++static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); ++static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); ++static int ext3_htree_next_block(struct inode *dir, __u32 hash, ++ struct dx_frame *frame, ++ struct dx_frame *frames, int *err, ++ __u32 *start_hash); ++static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, ++ struct ext3_dir_entry_2 **res_dir, int *err); ++static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, ++ struct inode *inode); ++ ++/* ++ * Future: use high four bits of block for coalesce-on-delete flags ++ * Mask them off for now. ++ */ ++ ++static inline unsigned dx_get_block (struct dx_entry *entry) ++{ ++ return le32_to_cpu(entry->block.v) & 0x00ffffff; ++} ++ ++static inline void dx_set_block (struct dx_entry *entry, unsigned value) ++{ ++ entry->block.v = cpu_to_le32(value); ++} ++ ++static inline unsigned dx_get_hash (struct dx_entry *entry) ++{ ++ return le32_to_cpu(entry->hash.v); ++} ++ ++static inline void dx_set_hash (struct dx_entry *entry, unsigned value) ++{ ++ entry->hash.v = cpu_to_le32(value); ++} ++ ++static inline unsigned dx_get_count (struct dx_entry *entries) ++{ ++ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); ++} ++ ++static inline unsigned dx_get_limit (struct dx_entry *entries) ++{ ++ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); ++} ++ ++static inline void dx_set_count (struct dx_entry *entries, unsigned value) ++{ ++ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); ++} ++ ++static inline void dx_set_limit (struct dx_entry *entries, unsigned value) ++{ ++ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); ++} ++ ++static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) ++{ ++ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - ++ EXT3_DIR_REC_LEN(2) - infosize; ++ return 0? 20: entry_space / sizeof(struct dx_entry); ++} ++ ++static inline unsigned dx_node_limit (struct inode *dir) ++{ ++ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); ++ return 0? 22: entry_space / sizeof(struct dx_entry); ++} ++ ++/* ++ * Debug ++ */ ++#ifdef DX_DEBUG ++struct stats ++{ ++ unsigned names; ++ unsigned space; ++ unsigned bcount; ++}; ++ ++static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, ++ int size, int show_names) ++{ ++ unsigned names = 0, space = 0; ++ char *base = (char *) de; ++ struct dx_hash_info h = *hinfo; ++ ++ printk("names: "); ++ while ((char *) de < base + size) ++ { ++ if (de->inode) ++ { ++ if (show_names) ++ { ++ int len = de->name_len; ++ char *name = de->name; ++ while (len--) printk("%c", *name++); ++ ext3fs_dirhash(de->name, de->name_len, &h); ++ printk(":%x.%u ", h.hash, ++ ((char *) de - base)); ++ } ++ space += EXT3_DIR_REC_LEN(de->name_len); ++ names++; ++ } ++ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); ++ } ++ printk("(%i)\n", names); ++ return (struct stats) { names, space, 1 }; ++} ++ ++struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, ++ struct dx_entry *entries, int levels) ++{ ++ unsigned blocksize = dir->i_sb->s_blocksize; ++ unsigned count = dx_get_count (entries), names = 0, space = 0, i; ++ unsigned bcount = 0; ++ struct buffer_head *bh; ++ int err; ++ printk("%i indexed blocks...\n", count); ++ for (i = 0; i < count; i++, entries++) ++ { ++ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; ++ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; ++ struct stats stats; ++ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); ++ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; ++ stats = levels? ++ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): ++ dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); ++ names += stats.names; ++ space += stats.space; ++ bcount += stats.bcount; ++ brelse (bh); ++ } ++ if (bcount) ++ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", ++ names, space/bcount,(space/bcount)*100/blocksize); ++ return (struct stats) { names, space, bcount}; ++} ++#endif /* DX_DEBUG */ ++ ++/* ++ * Probe for a directory leaf block to search. ++ * ++ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format ++ * error in the directory index, and the caller should fall back to ++ * searching the directory normally. The callers of dx_probe **MUST** ++ * check for this error code, and make sure it never gets reflected ++ * back to userspace. ++ */ ++static struct dx_frame * ++dx_probe(struct dentry *dentry, struct inode *dir, ++ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) ++{ ++ unsigned count, indirect; ++ struct dx_entry *at, *entries, *p, *q, *m; ++ struct dx_root *root; ++ struct buffer_head *bh; ++ struct dx_frame *frame = frame_in; ++ u32 hash; ++ ++ frame->bh = NULL; ++ if (dentry) ++ dir = dentry->d_parent->d_inode; ++ if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) ++ goto fail; ++ root = (struct dx_root *) bh->b_data; ++ if (root->info.hash_version != DX_HASH_TEA && ++ root->info.hash_version != DX_HASH_HALF_MD4 && ++ root->info.hash_version != DX_HASH_LEGACY) { ++ ext3_warning(dir->i_sb, __FUNCTION__, ++ "Unrecognised inode hash code %d", ++ root->info.hash_version); ++ brelse(bh); ++ *err = ERR_BAD_DX_DIR; ++ goto fail; ++ } ++ hinfo->hash_version = root->info.hash_version; ++ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; ++ if (dentry) ++ ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); ++ hash = hinfo->hash; ++ ++ if (root->info.unused_flags & 1) { ++ ext3_warning(dir->i_sb, __FUNCTION__, ++ "Unimplemented inode hash flags: %#06x", ++ root->info.unused_flags); ++ brelse(bh); ++ *err = ERR_BAD_DX_DIR; ++ goto fail; ++ } ++ ++ if ((indirect = root->info.indirect_levels) > 1) { ++ ext3_warning(dir->i_sb, __FUNCTION__, ++ "Unimplemented inode hash depth: %#06x", ++ root->info.indirect_levels); ++ brelse(bh); ++ *err = ERR_BAD_DX_DIR; ++ goto fail; ++ } ++ ++ entries = (struct dx_entry *) (((char *)&root->info) + ++ root->info.info_length); ++ assert(dx_get_limit(entries) == dx_root_limit(dir, ++ root->info.info_length)); ++ dxtrace (printk("Look up %x", hash)); ++ while (1) ++ { ++ count = dx_get_count(entries); ++ assert (count && count <= dx_get_limit(entries)); ++ p = entries + 1; ++ q = entries + count - 1; ++ while (p <= q) ++ { ++ m = p + (q - p)/2; ++ dxtrace(printk(".")); ++ if (dx_get_hash(m) > hash) ++ q = m - 1; ++ else ++ p = m + 1; ++ } ++ ++ if (0) // linear search cross check ++ { ++ unsigned n = count - 1; ++ at = entries; ++ while (n--) ++ { ++ dxtrace(printk(",")); ++ if (dx_get_hash(++at) > hash) ++ { ++ at--; ++ break; ++ } ++ } ++ assert (at == p - 1); ++ } ++ ++ at = p - 1; ++ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); ++ frame->bh = bh; ++ frame->entries = entries; ++ frame->at = at; ++ if (!indirect--) return frame; ++ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) ++ goto fail2; ++ at = entries = ((struct dx_node *) bh->b_data)->entries; ++ assert (dx_get_limit(entries) == dx_node_limit (dir)); ++ frame++; ++ } ++fail2: ++ while (frame >= frame_in) { ++ brelse(frame->bh); ++ frame--; ++ } ++fail: ++ return NULL; ++} ++ ++static void dx_release (struct dx_frame *frames) ++{ ++ if (frames[0].bh == NULL) ++ return; ++ ++ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) ++ brelse(frames[1].bh); ++ brelse(frames[0].bh); ++} ++ ++/* ++ * This function increments the frame pointer to search the next leaf ++ * block, and reads in the necessary intervening nodes if the search ++ * should be necessary. Whether or not the search is necessary is ++ * controlled by the hash parameter. If the hash value is even, then ++ * the search is only continued if the next block starts with that ++ * hash value. This is used if we are searching for a specific file. ++ * ++ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. ++ * ++ * This function returns 1 if the caller should continue to search, ++ * or 0 if it should not. If there is an error reading one of the ++ * index blocks, it will return -1. ++ * ++ * If start_hash is non-null, it will be filled in with the starting ++ * hash of the next page. ++ */ ++static int ext3_htree_next_block(struct inode *dir, __u32 hash, ++ struct dx_frame *frame, ++ struct dx_frame *frames, int *err, ++ __u32 *start_hash) ++{ ++ struct dx_frame *p; ++ struct buffer_head *bh; ++ int num_frames = 0; ++ __u32 bhash; ++ ++ *err = ENOENT; ++ p = frame; ++ /* ++ * Find the next leaf page by incrementing the frame pointer. ++ * If we run out of entries in the interior node, loop around and ++ * increment pointer in the parent node. When we break out of ++ * this loop, num_frames indicates the number of interior ++ * nodes need to be read. ++ */ ++ while (1) { ++ if (++(p->at) < p->entries + dx_get_count(p->entries)) ++ break; ++ if (p == frames) ++ return 0; ++ num_frames++; ++ p--; ++ } ++ ++ /* ++ * If the hash is 1, then continue only if the next page has a ++ * continuation hash of any value. This is used for readdir ++ * handling. Otherwise, check to see if the hash matches the ++ * desired contiuation hash. If it doesn't, return since ++ * there's no point to read in the successive index pages. ++ */ ++ bhash = dx_get_hash(p->at); ++ if (start_hash) ++ *start_hash = bhash; ++ if ((hash & 1) == 0) { ++ if ((bhash & ~1) != hash) ++ return 0; ++ } ++ /* ++ * If the hash is HASH_NB_ALWAYS, we always go to the next ++ * block so no check is necessary ++ */ ++ while (num_frames--) { ++ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), ++ 0, err))) ++ return -1; /* Failure */ ++ p++; ++ brelse (p->bh); ++ p->bh = bh; ++ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; ++ } ++ return 1; ++} ++ ++ ++/* ++ * p is at least 6 bytes before the end of page ++ */ ++static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) ++{ ++ return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); ++} ++ ++/* ++ * This function fills a red-black tree with information from a ++ * directory. We start scanning the directory in hash order, starting ++ * at start_hash and start_minor_hash. ++ * ++ * This function returns the number of entries inserted into the tree, ++ * or a negative error code. ++ */ ++int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, ++ __u32 start_minor_hash, __u32 *next_hash) ++{ ++ struct dx_hash_info hinfo; ++ struct buffer_head *bh; ++ struct ext3_dir_entry_2 *de, *top; ++ static struct dx_frame frames[2], *frame; ++ struct inode *dir; ++ int block, err; ++ int count = 0; ++ int ret; ++ __u32 hashval; ++ ++ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, ++ start_minor_hash)); ++ dir = dir_file->f_dentry->d_inode; ++ hinfo.hash = start_hash; ++ hinfo.minor_hash = 0; ++ frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); ++ if (!frame) ++ return err; ++ ++ /* Add '.' and '..' from the htree header */ ++ if (!start_hash && !start_minor_hash) { ++ de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; ++ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) ++ goto errout; ++ de = ext3_next_entry(de); ++ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) ++ goto errout; ++ count += 2; ++ } ++ ++ while (1) { ++ block = dx_get_block(frame->at); ++ dxtrace(printk("Reading block %d\n", block)); ++ if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) ++ goto errout; ++ ++ de = (struct ext3_dir_entry_2 *) bh->b_data; ++ top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - ++ EXT3_DIR_REC_LEN(0)); ++ for (; de < top; de = ext3_next_entry(de)) { ++ ext3fs_dirhash(de->name, de->name_len, &hinfo); ++ if ((hinfo.hash < start_hash) || ++ ((hinfo.hash == start_hash) && ++ (hinfo.minor_hash < start_minor_hash))) ++ continue; ++ if ((err = ext3_htree_store_dirent(dir_file, ++ hinfo.hash, hinfo.minor_hash, de)) != 0) ++ goto errout; ++ count++; ++ } ++ brelse (bh); ++ hashval = ~1; ++ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, ++ frame, frames, &err, &hashval); ++ if (next_hash) ++ *next_hash = hashval; ++ if (ret == -1) ++ goto errout; ++ /* ++ * Stop if: (a) there are no more entries, or ++ * (b) we have inserted at least one entry and the ++ * next hash value is not a continuation ++ */ ++ if ((ret == 0) || ++ (count && ((hashval & 1) == 0))) ++ break; ++ } ++ dx_release(frames); ++ dxtrace(printk("Fill tree: returned %d entries\n", count)); ++ return count; ++errout: ++ dx_release(frames); ++ return (err); ++} ++ ++ ++/* ++ * Directory block splitting, compacting ++ */ ++ ++static int dx_make_map (struct ext3_dir_entry_2 *de, int size, ++ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) ++{ ++ int count = 0; ++ char *base = (char *) de; ++ struct dx_hash_info h = *hinfo; ++ ++ while ((char *) de < base + size) ++ { ++ if (de->name_len && de->inode) { ++ ext3fs_dirhash(de->name, de->name_len, &h); ++ map_tail--; ++ map_tail->hash = h.hash; ++ map_tail->offs = (u32) ((char *) de - base); ++ count++; ++ } ++ /* XXX: do we need to check rec_len == 0 case? -Chris */ ++ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); ++ } ++ return count; ++} ++ ++static void dx_sort_map (struct dx_map_entry *map, unsigned count) ++{ ++ struct dx_map_entry *p, *q, *top = map + count - 1; ++ int more; ++ /* Combsort until bubble sort doesn't suck */ ++ while (count > 2) ++ { ++ count = count*10/13; ++ if (count - 9 < 2) /* 9, 10 -> 11 */ ++ count = 11; ++ for (p = top, q = p - count; q >= map; p--, q--) ++ if (p->hash < q->hash) ++ swap(*p, *q); ++ } ++ /* Garden variety bubble sort */ ++ do { ++ more = 0; ++ q = top; ++ while (q-- > map) ++ { ++ if (q[1].hash >= q[0].hash) ++ continue; ++ swap(*(q+1), *q); ++ more = 1; ++ } ++ } while(more); ++} ++ ++static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) ++{ ++ struct dx_entry *entries = frame->entries; ++ struct dx_entry *old = frame->at, *new = old + 1; ++ int count = dx_get_count(entries); ++ ++ assert(count < dx_get_limit(entries)); ++ assert(old < entries + count); ++ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); ++ dx_set_hash(new, hash); ++ dx_set_block(new, block); ++ dx_set_count(entries, count + 1); ++} ++#endif ++ ++ ++static void ext3_update_dx_flag(struct inode *inode) ++{ ++ if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, ++ EXT3_FEATURE_COMPAT_DIR_INDEX)) ++ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; ++} ++ + /* + * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. + * +@@ -94,6 +736,7 @@ + return 0; + } + ++ + /* + * ext3_find_entry() + * +@@ -105,6 +748,8 @@ + * The returned buffer_head has ->b_count elevated. The caller is expected + * to brelse() it when appropriate. + */ ++ ++ + static struct buffer_head * ext3_find_entry (struct dentry *dentry, + struct ext3_dir_entry_2 ** res_dir) + { +@@ -119,12 +764,32 @@ + int num = 0; + int nblocks, i, err; + struct inode *dir = dentry->d_parent->d_inode; ++ int namelen; ++ const u8 *name; ++ unsigned blocksize; + + *res_dir = NULL; + sb = dir->i_sb; +- ++ blocksize = sb->s_blocksize; ++ namelen = dentry->d_name.len; ++ name = dentry->d_name.name; ++ if (namelen > EXT3_NAME_LEN) ++ return NULL; ++#ifdef CONFIG_EXT3_INDEX ++ if (is_dx(dir)) { ++ bh = ext3_dx_find_entry(dentry, res_dir, &err); ++ /* ++ * On success, or if the error was file not found, ++ * return. Otherwise, fall back to doing a search the ++ * old fashioned way. ++ */ ++ if (bh || (err != ERR_BAD_DX_DIR)) ++ return bh; ++ dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); ++ } ++#endif + nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); +- start = dir->u.ext3_i.i_dir_start_lookup; ++ start = EXT3_I(dir)->i_dir_start_lookup; + if (start >= nblocks) + start = 0; + block = start; +@@ -165,7 +830,7 @@ + i = search_dirblock(bh, dir, dentry, + block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); + if (i == 1) { +- dir->u.ext3_i.i_dir_start_lookup = block; ++ EXT3_I(dir)->i_dir_start_lookup = block; + ret = bh; + goto cleanup_and_exit; + } else { +@@ -196,6 +861,66 @@ + return ret; + } + ++#ifdef CONFIG_EXT3_INDEX ++static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, ++ struct ext3_dir_entry_2 **res_dir, int *err) ++{ ++ struct super_block * sb; ++ struct dx_hash_info hinfo; ++ u32 hash; ++ struct dx_frame frames[2], *frame; ++ struct ext3_dir_entry_2 *de, *top; ++ struct buffer_head *bh; ++ unsigned long block; ++ int retval; ++ int namelen = dentry->d_name.len; ++ const u8 *name = dentry->d_name.name; ++ struct inode *dir = dentry->d_parent->d_inode; ++ ++ sb = dir->i_sb; ++ if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err))) ++ return NULL; ++ hash = hinfo.hash; ++ do { ++ block = dx_get_block(frame->at); ++ if (!(bh = ext3_bread (NULL,dir, block, 0, err))) ++ goto errout; ++ de = (struct ext3_dir_entry_2 *) bh->b_data; ++ top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - ++ EXT3_DIR_REC_LEN(0)); ++ for (; de < top; de = ext3_next_entry(de)) ++ if (ext3_match (namelen, name, de)) { ++ if (!ext3_check_dir_entry("ext3_find_entry", ++ dir, de, bh, ++ (block<b_data))) { ++ brelse (bh); ++ goto errout; ++ } ++ *res_dir = de; ++ dx_release (frames); ++ return bh; ++ } ++ brelse (bh); ++ /* Check to see if we should continue to search */ ++ retval = ext3_htree_next_block(dir, hash, frame, ++ frames, err, 0); ++ if (retval == -1) { ++ ext3_warning(sb, __FUNCTION__, ++ "error reading index page in directory #%lu", ++ dir->i_ino); ++ goto errout; ++ } ++ } while (retval == 1); ++ ++ *err = -ENOENT; ++errout: ++ dxtrace(printk("%s not found\n", name)); ++ dx_release (frames); ++ return NULL; ++} ++#endif ++ + static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) + { + struct inode * inode; +@@ -212,8 +937,9 @@ + brelse (bh); + inode = iget(dir->i_sb, ino); + +- if (!inode) ++ if (!inode) { + return ERR_PTR(-EACCES); ++ } + } + d_add(dentry, inode); + return NULL; +@@ -237,6 +963,301 @@ + de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; + } + ++#ifdef CONFIG_EXT3_INDEX ++static struct ext3_dir_entry_2 * ++dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) ++{ ++ unsigned rec_len = 0; ++ ++ while (count--) { ++ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); ++ rec_len = EXT3_DIR_REC_LEN(de->name_len); ++ memcpy (to, de, rec_len); ++ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len; ++ de->inode = 0; ++ map++; ++ to += rec_len; ++ } ++ return (struct ext3_dir_entry_2 *) (to - rec_len); ++} ++ ++static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) ++{ ++ struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; ++ unsigned rec_len = 0; ++ ++ prev = to = de; ++ while ((char*)de < base + size) { ++ next = (struct ext3_dir_entry_2 *) ((char *) de + ++ le16_to_cpu(de->rec_len)); ++ if (de->inode && de->name_len) { ++ rec_len = EXT3_DIR_REC_LEN(de->name_len); ++ if (de > to) ++ memmove(to, de, rec_len); ++ to->rec_len = rec_len; ++ prev = to; ++ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len); ++ } ++ de = next; ++ } ++ return prev; ++} ++ ++static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, ++ struct buffer_head **bh,struct dx_frame *frame, ++ struct dx_hash_info *hinfo, int *error) ++{ ++ unsigned blocksize = dir->i_sb->s_blocksize; ++ unsigned count, continued; ++ struct buffer_head *bh2; ++ u32 newblock; ++ u32 hash2; ++ struct dx_map_entry *map; ++ char *data1 = (*bh)->b_data, *data2; ++ unsigned split; ++ struct ext3_dir_entry_2 *de = NULL, *de2; ++ int err; ++ ++ bh2 = ext3_append (handle, dir, &newblock, error); ++ if (!(bh2)) { ++ brelse(*bh); ++ *bh = NULL; ++ goto errout; ++ } ++ ++ BUFFER_TRACE(*bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, *bh); ++ if (err) { ++ journal_error: ++ brelse(*bh); ++ brelse(bh2); ++ *bh = NULL; ++ ext3_std_error(dir->i_sb, err); ++ goto errout; ++ } ++ BUFFER_TRACE(frame->bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, frame->bh); ++ if (err) ++ goto journal_error; ++ ++ data2 = bh2->b_data; ++ ++ /* create map in the end of data2 block */ ++ map = (struct dx_map_entry *) (data2 + blocksize); ++ count = dx_make_map ((struct ext3_dir_entry_2 *) data1, ++ blocksize, hinfo, map); ++ map -= count; ++ split = count/2; // need to adjust to actual middle ++ dx_sort_map (map, count); ++ hash2 = map[split].hash; ++ continued = hash2 == map[split - 1].hash; ++ dxtrace(printk("Split block %i at %x, %i/%i\n", ++ dx_get_block(frame->at), hash2, split, count-split)); ++ ++ /* Fancy dance to stay within two buffers */ ++ de2 = dx_move_dirents(data1, data2, map + split, count - split); ++ de = dx_pack_dirents(data1,blocksize); ++ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); ++ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); ++ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); ++ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); ++ ++ /* Which block gets the new entry? */ ++ if (hinfo->hash >= hash2) ++ { ++ swap(*bh, bh2); ++ de = de2; ++ } ++ dx_insert_block (frame, hash2 + continued, newblock); ++ err = ext3_journal_dirty_metadata (handle, bh2); ++ if (err) ++ goto journal_error; ++ err = ext3_journal_dirty_metadata (handle, frame->bh); ++ if (err) ++ goto journal_error; ++ brelse (bh2); ++ dxtrace(dx_show_index ("frame", frame->entries)); ++errout: ++ return de; ++} ++#endif ++ ++ ++/* ++ * Add a new entry into a directory (leaf) block. If de is non-NULL, ++ * it points to a directory entry which is guaranteed to be large ++ * enough for new directory entry. If de is NULL, then ++ * add_dirent_to_buf will attempt search the directory block for ++ * space. It will return -ENOSPC if no space is available, and -EIO ++ * and -EEXIST if directory entry already exists. ++ * ++ * NOTE! bh is NOT released in the case where ENOSPC is returned. In ++ * all other cases bh is released. ++ */ ++static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, ++ struct inode *inode, struct ext3_dir_entry_2 *de, ++ struct buffer_head * bh) ++{ ++ struct inode *dir = dentry->d_parent->d_inode; ++ const char *name = dentry->d_name.name; ++ int namelen = dentry->d_name.len; ++ unsigned long offset = 0; ++ unsigned short reclen; ++ int nlen, rlen, err; ++ char *top; ++ ++ reclen = EXT3_DIR_REC_LEN(namelen); ++ if (!de) { ++ de = (struct ext3_dir_entry_2 *)bh->b_data; ++ top = bh->b_data + dir->i_sb->s_blocksize - reclen; ++ while ((char *) de <= top) { ++ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, ++ bh, offset)) { ++ brelse (bh); ++ return -EIO; ++ } ++ if (ext3_match (namelen, name, de)) { ++ brelse (bh); ++ return -EEXIST; ++ } ++ nlen = EXT3_DIR_REC_LEN(de->name_len); ++ rlen = le16_to_cpu(de->rec_len); ++ if ((de->inode? rlen - nlen: rlen) >= reclen) ++ break; ++ de = (struct ext3_dir_entry_2 *)((char *)de + rlen); ++ offset += rlen; ++ } ++ if ((char *) de > top) ++ return -ENOSPC; ++ } ++ BUFFER_TRACE(bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, bh); ++ if (err) { ++ ext3_std_error(dir->i_sb, err); ++ brelse(bh); ++ return err; ++ } ++ ++ /* By now the buffer is marked for journaling */ ++ nlen = EXT3_DIR_REC_LEN(de->name_len); ++ rlen = le16_to_cpu(de->rec_len); ++ if (de->inode) { ++ struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); ++ de1->rec_len = cpu_to_le16(rlen - nlen); ++ de->rec_len = cpu_to_le16(nlen); ++ de = de1; ++ } ++ de->file_type = EXT3_FT_UNKNOWN; ++ if (inode) { ++ de->inode = cpu_to_le32(inode->i_ino); ++ ext3_set_de_type(dir->i_sb, de, inode->i_mode); ++ } else ++ de->inode = 0; ++ de->name_len = namelen; ++ memcpy (de->name, name, namelen); ++ /* ++ * XXX shouldn't update any times until successful ++ * completion of syscall, but too many callers depend ++ * on this. ++ * ++ * XXX similarly, too many callers depend on ++ * ext3_new_inode() setting the times, but error ++ * recovery deletes the inode, so the worst that can ++ * happen is that the times are slightly out of date ++ * and/or different from the directory change time. ++ */ ++ dir->i_mtime = dir->i_ctime = CURRENT_TIME; ++ ext3_update_dx_flag(dir); ++ dir->i_version = ++event; ++ ext3_mark_inode_dirty(handle, dir); ++ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); ++ err = ext3_journal_dirty_metadata(handle, bh); ++ if (err) ++ ext3_std_error(dir->i_sb, err); ++ brelse(bh); ++ return 0; ++} ++ ++#ifdef CONFIG_EXT3_INDEX ++/* ++ * This converts a one block unindexed directory to a 3 block indexed ++ * directory, and adds the dentry to the indexed directory. ++ */ ++static int make_indexed_dir(handle_t *handle, struct dentry *dentry, ++ struct inode *inode, struct buffer_head *bh) ++{ ++ struct inode *dir = dentry->d_parent->d_inode; ++ const char *name = dentry->d_name.name; ++ int namelen = dentry->d_name.len; ++ struct buffer_head *bh2; ++ struct dx_root *root; ++ struct dx_frame frames[2], *frame; ++ struct dx_entry *entries; ++ struct ext3_dir_entry_2 *de, *de2; ++ char *data1, *top; ++ unsigned len; ++ int retval; ++ unsigned blocksize; ++ struct dx_hash_info hinfo; ++ u32 block; ++ ++ blocksize = dir->i_sb->s_blocksize; ++ dxtrace(printk("Creating index\n")); ++ retval = ext3_journal_get_write_access(handle, bh); ++ if (retval) { ++ ext3_std_error(dir->i_sb, retval); ++ brelse(bh); ++ return retval; ++ } ++ root = (struct dx_root *) bh->b_data; ++ ++ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; ++ bh2 = ext3_append (handle, dir, &block, &retval); ++ if (!(bh2)) { ++ brelse(bh); ++ return retval; ++ } ++ data1 = bh2->b_data; ++ ++ /* The 0th block becomes the root, move the dirents out */ ++ de = (struct ext3_dir_entry_2 *) &root->dotdot; ++ de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len); ++ len = ((char *) root) + blocksize - (char *) de; ++ memcpy (data1, de, len); ++ de = (struct ext3_dir_entry_2 *) data1; ++ top = data1 + len; ++ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) ++ de = de2; ++ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); ++ /* Initialize the root; the dot dirents already exist */ ++ de = (struct ext3_dir_entry_2 *) (&root->dotdot); ++ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); ++ memset (&root->info, 0, sizeof(root->info)); ++ root->info.info_length = sizeof(root->info); ++ root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; ++ entries = root->entries; ++ dx_set_block (entries, 1); ++ dx_set_count (entries, 1); ++ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); ++ ++ /* Initialize as for dx_probe */ ++ hinfo.hash_version = root->info.hash_version; ++ hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; ++ ext3fs_dirhash(name, namelen, &hinfo); ++ frame = frames; ++ frame->entries = entries; ++ frame->at = entries; ++ frame->bh = bh; ++ bh = bh2; ++ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); ++ dx_release (frames); ++ if (!(de)) ++ return retval; ++ ++ return add_dirent_to_buf(handle, dentry, inode, de, bh); ++} ++#endif ++ + /* + * ext3_add_entry() + * +@@ -247,127 +1268,198 @@ + * may not sleep between calling this and putting something into + * the entry, as someone else might have used it while you slept. + */ +- +-/* +- * AKPM: the journalling code here looks wrong on the error paths +- */ + static int ext3_add_entry (handle_t *handle, struct dentry *dentry, + struct inode *inode) + { + struct inode *dir = dentry->d_parent->d_inode; +- const char *name = dentry->d_name.name; +- int namelen = dentry->d_name.len; + unsigned long offset; +- unsigned short rec_len; + struct buffer_head * bh; +- struct ext3_dir_entry_2 * de, * de1; ++ struct ext3_dir_entry_2 *de; + struct super_block * sb; + int retval; ++#ifdef CONFIG_EXT3_INDEX ++ int dx_fallback=0; ++#endif ++ unsigned blocksize; ++ unsigned nlen, rlen; ++ u32 block, blocks; + + sb = dir->i_sb; +- +- if (!namelen) ++ blocksize = sb->s_blocksize; ++ if (!dentry->d_name.len) + return -EINVAL; +- bh = ext3_bread (handle, dir, 0, 0, &retval); ++#ifdef CONFIG_EXT3_INDEX ++ if (is_dx(dir)) { ++ retval = ext3_dx_add_entry(handle, dentry, inode); ++ if (!retval || (retval != ERR_BAD_DX_DIR)) ++ return retval; ++ EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; ++ dx_fallback++; ++ ext3_mark_inode_dirty(handle, dir); ++ } ++#endif ++ blocks = dir->i_size >> sb->s_blocksize_bits; ++ for (block = 0, offset = 0; block < blocks; block++) { ++ bh = ext3_bread(handle, dir, block, 0, &retval); ++ if(!bh) ++ return retval; ++ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh); ++ if (retval != -ENOSPC) ++ return retval; ++ ++#ifdef CONFIG_EXT3_INDEX ++ if (blocks == 1 && !dx_fallback && ++ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) ++ return make_indexed_dir(handle, dentry, inode, bh); ++#endif ++ brelse(bh); ++ } ++ bh = ext3_append(handle, dir, &block, &retval); + if (!bh) + return retval; +- rec_len = EXT3_DIR_REC_LEN(namelen); +- offset = 0; + de = (struct ext3_dir_entry_2 *) bh->b_data; +- while (1) { +- if ((char *)de >= sb->s_blocksize + bh->b_data) { +- brelse (bh); +- bh = NULL; +- bh = ext3_bread (handle, dir, +- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); +- if (!bh) +- return retval; +- if (dir->i_size <= offset) { +- if (dir->i_size == 0) { +- brelse(bh); +- return -ENOENT; +- } ++ de->inode = 0; ++ de->rec_len = cpu_to_le16(rlen = blocksize); ++ nlen = 0; ++ return add_dirent_to_buf(handle, dentry, inode, de, bh); ++} + +- ext3_debug ("creating next block\n"); ++#ifdef CONFIG_EXT3_INDEX ++/* ++ * Returns 0 for success, or a negative error value ++ */ ++static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, ++ struct inode *inode) ++{ ++ struct dx_frame frames[2], *frame; ++ struct dx_entry *entries, *at; ++ struct dx_hash_info hinfo; ++ struct buffer_head * bh; ++ struct inode *dir = dentry->d_parent->d_inode; ++ struct super_block * sb = dir->i_sb; ++ struct ext3_dir_entry_2 *de; ++ int err; + +- BUFFER_TRACE(bh, "get_write_access"); +- ext3_journal_get_write_access(handle, bh); +- de = (struct ext3_dir_entry_2 *) bh->b_data; +- de->inode = 0; +- de->rec_len = le16_to_cpu(sb->s_blocksize); +- dir->u.ext3_i.i_disksize = +- dir->i_size = offset + sb->s_blocksize; +- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; +- ext3_mark_inode_dirty(handle, dir); +- } else { ++ frame = dx_probe(dentry, 0, &hinfo, frames, &err); ++ if (!frame) ++ return err; ++ entries = frame->entries; ++ at = frame->at; + +- ext3_debug ("skipping to next block\n"); ++ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) ++ goto cleanup; + +- de = (struct ext3_dir_entry_2 *) bh->b_data; +- } +- } +- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, +- offset)) { +- brelse (bh); +- return -ENOENT; +- } +- if (ext3_match (namelen, name, de)) { +- brelse (bh); +- return -EEXIST; ++ BUFFER_TRACE(bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, bh); ++ if (err) ++ goto journal_error; ++ ++ err = add_dirent_to_buf(handle, dentry, inode, 0, bh); ++ if (err != -ENOSPC) { ++ bh = 0; ++ goto cleanup; ++ } ++ ++ /* Block full, should compress but for now just split */ ++ dxtrace(printk("using %u of %u node entries\n", ++ dx_get_count(entries), dx_get_limit(entries))); ++ /* Need to split index? */ ++ if (dx_get_count(entries) == dx_get_limit(entries)) { ++ u32 newblock; ++ unsigned icount = dx_get_count(entries); ++ int levels = frame - frames; ++ struct dx_entry *entries2; ++ struct dx_node *node2; ++ struct buffer_head *bh2; ++ ++ if (levels && (dx_get_count(frames->entries) == ++ dx_get_limit(frames->entries))) { ++ ext3_warning(sb, __FUNCTION__, ++ "Directory index full!\n"); ++ err = -ENOSPC; ++ goto cleanup; + } +- if ((le32_to_cpu(de->inode) == 0 && +- le16_to_cpu(de->rec_len) >= rec_len) || +- (le16_to_cpu(de->rec_len) >= +- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { +- BUFFER_TRACE(bh, "get_write_access"); +- ext3_journal_get_write_access(handle, bh); +- /* By now the buffer is marked for journaling */ +- offset += le16_to_cpu(de->rec_len); +- if (le32_to_cpu(de->inode)) { +- de1 = (struct ext3_dir_entry_2 *) ((char *) de + +- EXT3_DIR_REC_LEN(de->name_len)); +- de1->rec_len = +- cpu_to_le16(le16_to_cpu(de->rec_len) - +- EXT3_DIR_REC_LEN(de->name_len)); +- de->rec_len = cpu_to_le16( +- EXT3_DIR_REC_LEN(de->name_len)); +- de = de1; ++ bh2 = ext3_append (handle, dir, &newblock, &err); ++ if (!(bh2)) ++ goto cleanup; ++ node2 = (struct dx_node *)(bh2->b_data); ++ entries2 = node2->entries; ++ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); ++ node2->fake.inode = 0; ++ BUFFER_TRACE(frame->bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, frame->bh); ++ if (err) ++ goto journal_error; ++ if (levels) { ++ unsigned icount1 = icount/2, icount2 = icount - icount1; ++ unsigned hash2 = dx_get_hash(entries + icount1); ++ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); ++ ++ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ ++ err = ext3_journal_get_write_access(handle, ++ frames[0].bh); ++ if (err) ++ goto journal_error; ++ ++ memcpy ((char *) entries2, (char *) (entries + icount1), ++ icount2 * sizeof(struct dx_entry)); ++ dx_set_count (entries, icount1); ++ dx_set_count (entries2, icount2); ++ dx_set_limit (entries2, dx_node_limit(dir)); ++ ++ /* Which index block gets the new entry? */ ++ if (at - entries >= icount1) { ++ frame->at = at = at - entries - icount1 + entries2; ++ frame->entries = entries = entries2; ++ swap(frame->bh, bh2); + } +- de->file_type = EXT3_FT_UNKNOWN; +- if (inode) { +- de->inode = cpu_to_le32(inode->i_ino); +- ext3_set_de_type(dir->i_sb, de, inode->i_mode); +- } else +- de->inode = 0; +- de->name_len = namelen; +- memcpy (de->name, name, namelen); +- /* +- * XXX shouldn't update any times until successful +- * completion of syscall, but too many callers depend +- * on this. +- * +- * XXX similarly, too many callers depend on +- * ext3_new_inode() setting the times, but error +- * recovery deletes the inode, so the worst that can +- * happen is that the times are slightly out of date +- * and/or different from the directory change time. +- */ +- dir->i_mtime = dir->i_ctime = CURRENT_TIME; +- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; +- dir->i_version = ++event; +- ext3_mark_inode_dirty(handle, dir); +- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); +- ext3_journal_dirty_metadata(handle, bh); +- brelse(bh); +- return 0; ++ dx_insert_block (frames + 0, hash2, newblock); ++ dxtrace(dx_show_index ("node", frames[1].entries)); ++ dxtrace(dx_show_index ("node", ++ ((struct dx_node *) bh2->b_data)->entries)); ++ err = ext3_journal_dirty_metadata(handle, bh2); ++ if (err) ++ goto journal_error; ++ brelse (bh2); ++ } else { ++ dxtrace(printk("Creating second level index...\n")); ++ memcpy((char *) entries2, (char *) entries, ++ icount * sizeof(struct dx_entry)); ++ dx_set_limit(entries2, dx_node_limit(dir)); ++ ++ /* Set up root */ ++ dx_set_count(entries, 1); ++ dx_set_block(entries + 0, newblock); ++ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; ++ ++ /* Add new access path frame */ ++ frame = frames + 1; ++ frame->at = at = at - entries + entries2; ++ frame->entries = entries = entries2; ++ frame->bh = bh2; ++ err = ext3_journal_get_write_access(handle, ++ frame->bh); ++ if (err) ++ goto journal_error; + } +- offset += le16_to_cpu(de->rec_len); +- de = (struct ext3_dir_entry_2 *) +- ((char *) de + le16_to_cpu(de->rec_len)); ++ ext3_journal_dirty_metadata(handle, frames[0].bh); + } +- brelse (bh); +- return -ENOSPC; ++ de = do_split(handle, dir, &bh, frame, &hinfo, &err); ++ if (!de) ++ goto cleanup; ++ err = add_dirent_to_buf(handle, dentry, inode, de, bh); ++ bh = 0; ++ goto cleanup; ++ ++journal_error: ++ ext3_std_error(dir->i_sb, err); ++cleanup: ++ if (bh) ++ brelse(bh); ++ dx_release(frames); ++ return err; + } ++#endif + + /* + * ext3_delete_entry deletes a directory entry by merging it with the +@@ -454,9 +1546,11 @@ + struct inode * inode; + int err; + +- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); +- if (IS_ERR(handle)) ++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + ++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); ++ if (IS_ERR(handle)) { + return PTR_ERR(handle); ++ } + + if (IS_SYNC(dir)) + handle->h_sync = 1; +@@ -480,9 +1574,11 @@ + struct inode *inode; + int err; + +- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); +- if (IS_ERR(handle)) ++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + ++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); ++ if (IS_ERR(handle)) { + return PTR_ERR(handle); ++ } + + if (IS_SYNC(dir)) + handle->h_sync = 1; +@@ -508,9 +1604,11 @@ + if (dir->i_nlink >= EXT3_LINK_MAX) + return -EMLINK; + +- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); +- if (IS_ERR(handle)) ++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + ++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); ++ if (IS_ERR(handle)) { + return PTR_ERR(handle); ++ } + + if (IS_SYNC(dir)) + handle->h_sync = 1; +@@ -522,7 +1620,7 @@ + + inode->i_op = &ext3_dir_inode_operations; + inode->i_fop = &ext3_dir_operations; +- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; ++ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; + inode->i_blocks = 0; + dir_block = ext3_bread (handle, inode, 0, 1, &err); + if (!dir_block) { +@@ -555,21 +1653,19 @@ + inode->i_mode |= S_ISGID; + ext3_mark_inode_dirty(handle, inode); + err = ext3_add_entry (handle, dentry, inode); +- if (err) +- goto out_no_entry; ++ if (err) { ++ inode->i_nlink = 0; ++ ext3_mark_inode_dirty(handle, inode); ++ iput (inode); ++ goto out_stop; ++ } + dir->i_nlink++; +- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; ++ ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + d_instantiate(dentry, inode); + out_stop: + ext3_journal_stop(handle, dir); + return err; +- +-out_no_entry: +- inode->i_nlink = 0; +- ext3_mark_inode_dirty(handle, inode); +- iput (inode); +- goto out_stop; + } + + /* +@@ -656,7 +1752,7 @@ + int err = 0, rc; + + lock_super(sb); +- if (!list_empty(&inode->u.ext3_i.i_orphan)) ++ if (!list_empty(&EXT3_I(inode)->i_orphan)) + goto out_unlock; + + /* Orphan handling is only valid for files with data blocks +@@ -697,7 +1793,7 @@ + * This is safe: on error we're going to ignore the orphan list + * anyway on the next recovery. */ + if (!err) +- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); ++ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); + + jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); + jbd_debug(4, "orphan inode %ld will point to %d\n", +@@ -794,8 +1890,9 @@ + handle_t *handle; + + handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); +- if (IS_ERR(handle)) ++ if (IS_ERR(handle)) { + return PTR_ERR(handle); ++ } + + retval = -ENOENT; + bh = ext3_find_entry (dentry, &de); +@@ -833,7 +1930,7 @@ + dir->i_nlink--; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_mark_inode_dirty(handle, inode); +- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; ++ ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + + end_rmdir: +@@ -851,8 +1948,9 @@ + handle_t *handle; + + handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); +- if (IS_ERR(handle)) ++ if (IS_ERR(handle)) { + return PTR_ERR(handle); ++ } + + if (IS_SYNC(dir)) + handle->h_sync = 1; +@@ -879,7 +1977,7 @@ + if (retval) + goto end_unlink; + dir->i_ctime = dir->i_mtime = CURRENT_TIME; +- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; ++ ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + inode->i_nlink--; + if (!inode->i_nlink) +@@ -905,9 +2003,11 @@ + if (l > dir->i_sb->s_blocksize) + return -ENAMETOOLONG; + +- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); +- if (IS_ERR(handle)) ++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + ++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); ++ if (IS_ERR(handle)) { + return PTR_ERR(handle); ++ } + + if (IS_SYNC(dir)) + handle->h_sync = 1; +@@ -917,7 +2017,7 @@ + if (IS_ERR(inode)) + goto out_stop; + +- if (l > sizeof (inode->u.ext3_i.i_data)) { ++ if (l > sizeof (EXT3_I(inode)->i_data)) { + inode->i_op = &page_symlink_inode_operations; + inode->i_mapping->a_ops = &ext3_aops; + /* +@@ -926,8 +2026,12 @@ + * i_size in generic_commit_write(). + */ + err = block_symlink(inode, symname, l); +- if (err) +- goto out_no_entry; ++ if (err) { ++ ext3_dec_count(handle, inode); ++ ext3_mark_inode_dirty(handle, inode); ++ iput (inode); ++ goto out_stop; ++ } + } else { + inode->i_op = &ext3_fast_symlink_inode_operations; + memcpy((char*)&inode->u.ext3_i.i_data,symname,l); +@@ -938,12 +2042,6 @@ + out_stop: + ext3_journal_stop(handle, dir); + return err; +- +-out_no_entry: +- ext3_dec_count(handle, inode); +- ext3_mark_inode_dirty(handle, inode); +- iput (inode); +- goto out_stop; + } + + static int ext3_link (struct dentry * old_dentry, +@@ -956,12 +2054,15 @@ + if (S_ISDIR(inode->i_mode)) + return -EPERM; + +- if (inode->i_nlink >= EXT3_LINK_MAX) ++ if (inode->i_nlink >= EXT3_LINK_MAX) { + return -EMLINK; ++ } + +- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); +- if (IS_ERR(handle)) ++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + ++ EXT3_INDEX_EXTRA_TRANS_BLOCKS); ++ if (IS_ERR(handle)) { + return PTR_ERR(handle); ++ } + + if (IS_SYNC(dir)) + handle->h_sync = 1; +@@ -994,9 +2095,11 @@ + + old_bh = new_bh = dir_bh = NULL; + +- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); +- if (IS_ERR(handle)) ++ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + ++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); ++ if (IS_ERR(handle)) { + return PTR_ERR(handle); ++ } + + if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) + handle->h_sync = 1; +@@ -1069,14 +2172,33 @@ + /* + * ok, that's it + */ +- ext3_delete_entry(handle, old_dir, old_de, old_bh); ++ retval = ext3_delete_entry(handle, old_dir, old_de, old_bh); ++ if (retval == -ENOENT) { ++ /* ++ * old_de could have moved out from under us. ++ */ ++ struct buffer_head *old_bh2; ++ struct ext3_dir_entry_2 *old_de2; ++ ++ old_bh2 = ext3_find_entry(old_dentry, &old_de2); ++ if (old_bh2) { ++ retval = ext3_delete_entry(handle, old_dir, ++ old_de2, old_bh2); ++ brelse(old_bh2); ++ } ++ } ++ if (retval) { ++ ext3_warning(old_dir->i_sb, "ext3_rename", ++ "Deleting old file (%lu), %d, error=%d", ++ old_dir->i_ino, old_dir->i_nlink, retval); ++ } + + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_ctime = CURRENT_TIME; + } + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; +- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; ++ ext3_update_dx_flag(old_dir); + if (dir_bh) { + BUFFER_TRACE(dir_bh, "get_write_access"); + ext3_journal_get_write_access(handle, dir_bh); +@@ -1088,7 +2210,7 @@ + new_inode->i_nlink--; + } else { + new_dir->i_nlink++; +- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; ++ ext3_update_dx_flag(new_dir); + ext3_mark_inode_dirty(handle, new_dir); + } + } +Index: linux-2.4.21-suse/fs/ext3/super.c +=================================================================== +--- linux-2.4.21-suse.orig/fs/ext3/super.c 2003-10-29 22:39:14.000000000 +0300 ++++ linux-2.4.21-suse/fs/ext3/super.c 2003-10-29 23:17:20.000000000 +0300 +@@ -710,6 +710,7 @@ + es->s_mtime = cpu_to_le32(CURRENT_TIME); + ext3_update_dynamic_rev(sb); + EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); ++ + ext3_commit_super (sb, es, 1); + if (test_opt (sb, DEBUG)) + printk (KERN_INFO +@@ -720,6 +721,7 @@ + EXT3_BLOCKS_PER_GROUP(sb), + EXT3_INODES_PER_GROUP(sb), + sbi->s_mount_opt); ++ + printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", + bdevname(sb->s_dev)); + if (EXT3_SB(sb)->s_journal->j_inode == NULL) { +@@ -893,6 +895,7 @@ + return res; + } + ++ + struct super_block * ext3_read_super (struct super_block * sb, void * data, + int silent) + { +@@ -1069,6 +1072,9 @@ + sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); + sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); ++ for (i=0; i < 4; i++) ++ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); ++ sbi->s_def_hash_version = es->s_def_hash_version; + + if (sbi->s_blocks_per_group > blocksize * 8) { + printk (KERN_ERR +@@ -1770,6 +1776,7 @@ + unregister_filesystem(&ext3_fs_type); + } + ++EXPORT_SYMBOL(ext3_force_commit); + EXPORT_SYMBOL(ext3_bread); + + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); +Index: linux-2.4.21-suse/include/linux/ext3_fs.h +=================================================================== +--- linux-2.4.21-suse.orig/include/linux/ext3_fs.h 2003-06-14 02:28:25.000000000 +0400 ++++ linux-2.4.21-suse/include/linux/ext3_fs.h 2003-10-29 23:17:20.000000000 +0300 +@@ -40,6 +40,11 @@ + #define EXT3FS_VERSION "2.4-0.9.19" + + /* ++ * Always enable hashed directories ++ */ ++#define CONFIG_EXT3_INDEX ++ ++/* + * Debug code + */ + #ifdef EXT3FS_DEBUG +@@ -438,8 +443,11 @@ + /*E0*/ __u32 s_journal_inum; /* inode number of journal file */ + __u32 s_journal_dev; /* device number of journal file */ + __u32 s_last_orphan; /* start of list of inodes to delete */ +- +-/*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ ++ __u32 s_hash_seed[4]; /* HTREE hash seed */ ++ __u8 s_def_hash_version; /* Default hash version to use */ ++ __u8 s_reserved_char_pad; ++ __u16 s_reserved_word_pad; ++ __u32 s_reserved[192]; /* Padding to the end of the block */ + }; + + #ifdef __KERNEL__ +@@ -576,9 +584,46 @@ + #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) + #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ + ~EXT3_DIR_ROUND) ++/* ++ * Hash Tree Directory indexing ++ * (c) Daniel Phillips, 2001 ++ */ ++ ++#ifdef CONFIG_EXT3_INDEX ++ #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ ++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ ++ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) ++#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) ++#else ++ #define is_dx(dir) 0 ++#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) ++#endif ++ ++/* Legal values for the dx_root hash_version field: */ ++ ++#define DX_HASH_LEGACY 0 ++#define DX_HASH_HALF_MD4 1 ++#define DX_HASH_TEA 2 ++ ++/* hash info structure used by the directory hash */ ++struct dx_hash_info ++{ ++ u32 hash; ++ u32 minor_hash; ++ int hash_version; ++ u32 *seed; ++}; + + #ifdef __KERNEL__ + /* ++ * Control parameters used by ext3_htree_next_block ++ */ ++#define HASH_NB_ALWAYS 1 ++ ++ ++/* + * Describe an inode's exact location on disk and in memory + */ + struct ext3_iloc +@@ -588,6 +633,27 @@ + unsigned long block_group; + }; + ++ ++/* ++ * This structure is stuffed into the struct file's private_data field ++ * for directories. It is where we put information so that we can do ++ * readdir operations in hash tree order. ++ */ ++struct dir_private_info { ++ rb_root_t root; ++ rb_node_t *curr_node; ++ struct fname *extra_fname; ++ loff_t last_pos; ++ __u32 curr_hash; ++ __u32 curr_minor_hash; ++ __u32 next_hash; ++}; ++ ++/* ++ * Special error return code only used by dx_probe() and its callers. ++ */ ++#define ERR_BAD_DX_DIR -75000 ++ + /* + * Function prototypes + */ +@@ -615,11 +681,20 @@ + + /* dir.c */ + extern int ext3_check_dir_entry(const char *, struct inode *, +- struct ext3_dir_entry_2 *, struct buffer_head *, +- unsigned long); ++ struct ext3_dir_entry_2 *, ++ struct buffer_head *, unsigned long); ++extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, ++ __u32 minor_hash, ++ struct ext3_dir_entry_2 *dirent); ++extern void ext3_htree_free_dir_info(struct dir_private_info *p); ++ + /* fsync.c */ + extern int ext3_sync_file (struct file *, struct dentry *, int); + ++/* hash.c */ ++extern int ext3fs_dirhash(const char *name, int len, struct ++ dx_hash_info *hinfo); ++ + /* ialloc.c */ + extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); + extern void ext3_free_inode (handle_t *, struct inode *); +@@ -652,6 +727,8 @@ + /* namei.c */ + extern int ext3_orphan_add(handle_t *, struct inode *); + extern int ext3_orphan_del(handle_t *, struct inode *); ++extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, ++ __u32 start_minor_hash, __u32 *next_hash); + + /* super.c */ + extern void ext3_error (struct super_block *, const char *, const char *, ...) +Index: linux-2.4.21-suse/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.4.21-suse.orig/include/linux/ext3_fs_sb.h 2003-06-14 02:26:52.000000000 +0400 ++++ linux-2.4.21-suse/include/linux/ext3_fs_sb.h 2003-10-29 23:17:20.000000000 +0300 +@@ -62,6 +62,8 @@ + int s_inode_size; + int s_first_ino; + u32 s_next_generation; ++ u32 s_hash_seed[4]; ++ int s_def_hash_version; + + /* Journaling */ + struct inode * s_journal_inode; +Index: linux-2.4.21-suse/include/linux/ext3_jbd.h +=================================================================== +--- linux-2.4.21-suse.orig/include/linux/ext3_jbd.h 2003-06-14 02:28:25.000000000 +0400 ++++ linux-2.4.21-suse/include/linux/ext3_jbd.h 2003-10-29 23:17:20.000000000 +0300 +@@ -63,6 +63,8 @@ + + #define EXT3_RESERVE_TRANS_BLOCKS 12U + ++#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 ++ + int + ext3_mark_iloc_dirty(handle_t *handle, + struct inode *inode, +Index: linux-2.4.21-suse/include/linux/rbtree.h +=================================================================== +--- linux-2.4.21-suse.orig/include/linux/rbtree.h 2003-06-14 02:26:51.000000000 +0400 ++++ linux-2.4.21-suse/include/linux/rbtree.h 2003-10-29 23:17:20.000000000 +0300 +@@ -120,6 +120,8 @@ + + extern void rb_insert_color(rb_node_t *, rb_root_t *); + extern void rb_erase(rb_node_t *, rb_root_t *); ++extern rb_node_t *rb_get_first(rb_root_t *root); ++extern rb_node_t *rb_get_next(rb_node_t *n); + + static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link) + { +Index: linux-2.4.21-suse/lib/rbtree.c +=================================================================== +--- linux-2.4.21-suse.orig/lib/rbtree.c 2002-08-03 04:39:46.000000000 +0400 ++++ linux-2.4.21-suse/lib/rbtree.c 2003-10-29 23:17:20.000000000 +0300 +@@ -17,6 +17,8 @@ + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c ++ ++ rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002 + */ + + #include +@@ -294,3 +296,43 @@ + __rb_erase_color(child, parent, root); + } + EXPORT_SYMBOL(rb_erase); ++ ++/* ++ * This function returns the first node (in sort order) of the tree. ++ */ ++rb_node_t *rb_get_first(rb_root_t *root) ++{ ++ rb_node_t *n; ++ ++ n = root->rb_node; ++ if (!n) ++ return 0; ++ while (n->rb_left) ++ n = n->rb_left; ++ return n; ++} ++EXPORT_SYMBOL(rb_get_first); ++ ++/* ++ * Given a node, this function will return the next node in the tree. ++ */ ++rb_node_t *rb_get_next(rb_node_t *n) ++{ ++ rb_node_t *parent; ++ ++ if (n->rb_right) { ++ n = n->rb_right; ++ while (n->rb_left) ++ n = n->rb_left; ++ return n; ++ } else { ++ while ((parent = n->rb_parent)) { ++ if (n == parent->rb_left) ++ return parent; ++ n = parent; ++ } ++ return 0; ++ } ++} ++EXPORT_SYMBOL(rb_get_next); ++ diff --git a/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.18.patch b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.18.patch new file mode 100644 index 0000000..85e12b8 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.18.patch @@ -0,0 +1,350 @@ +Index: linux-2.4.18-chaos/fs/ext3/ialloc.c +=================================================================== +--- linux-2.4.18-chaos.orig/fs/ext3/ialloc.c 2003-10-22 14:23:53.000000000 +0400 ++++ linux-2.4.18-chaos/fs/ext3/ialloc.c 2003-10-29 20:42:04.000000000 +0300 +@@ -241,11 +241,16 @@ + + bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; + +- BUFFER_TRACE(bh, "get_write_access"); +- fatal = ext3_journal_get_write_access(handle, bh); ++ BUFFER_TRACE(bh, "get_undo_access"); ++ fatal = ext3_journal_get_undo_access(handle, bh); + if (fatal) + goto error_return; + ++ /* to prevent inode reusing within single transaction -bzzz */ ++ BUFFER_TRACE(bh, "clear in b_committed_data"); ++ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data != NULL); ++ ext3_set_bit(bit, bh2jh(bh)->b_committed_data); ++ + /* Ok, now we can actually update the inode bitmaps.. */ + if (!ext3_clear_bit (bit, bh->b_data)) + ext3_error (sb, "ext3_free_inode", +@@ -319,6 +324,131 @@ + return 0; + } + ++static int ext3_test_allocatable(int nr, struct buffer_head *bh) ++{ ++ if (ext3_test_bit(nr, bh->b_data)) ++ return 0; ++ if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data) ++ return 1; ++#if 0 ++ if (!ext3_test_bit(nr, bh2jh(bh)->b_committed_data)) ++ printk("EXT3-fs: inode %d was used\n", nr); ++#endif ++ return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data); ++} ++ ++int ext3_find_group_dir(const struct inode *dir, ++ struct ext3_group_desc **gdp, ++ struct buffer_head **bh) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct ext3_super_block *es; ++ struct ext3_group_desc *tmp; ++ int i = 0, j, avefreei; ++ ++ es = EXT3_SB(sb)->s_es; ++ avefreei = le32_to_cpu(es->s_free_inodes_count) / ++ EXT3_SB(sb)->s_groups_count; ++ for (j = 0; j < EXT3_SB(sb)->s_groups_count; j++) { ++ struct buffer_head *temp_buffer; ++ tmp = ext3_get_group_desc(sb, j, &temp_buffer); ++ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count) && ++ le16_to_cpu(tmp->bg_free_inodes_count) >= avefreei) { ++ if (!*gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > ++ le16_to_cpu((*gdp)->bg_free_blocks_count))) { ++ i = j; ++ *gdp = tmp; ++ *bh = temp_buffer; ++ } ++ } ++ } ++ ++ return i; ++} ++ ++int ext3_find_group_other(const struct inode *dir, ++ struct ext3_group_desc **gdp, ++ struct buffer_head **bh) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct ext3_group_desc *tmp; ++ int i, j; ++ ++ /* ++ * Try to place the inode in its parent directory ++ */ ++ i = EXT3_I(dir)->i_block_group; ++ tmp = ext3_get_group_desc(sb, i, bh); ++ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) ++ *gdp = tmp; ++ else { ++ /* ++ * Use a quadratic hash to find a group with a ++ * free inode ++ */ ++ for (j = 1; j < EXT3_SB(sb)->s_groups_count; j <<= 1) { ++ i += j; ++ if (i >= EXT3_SB(sb)->s_groups_count) ++ i -= EXT3_SB(sb)->s_groups_count; ++ tmp = ext3_get_group_desc (sb, i, bh); ++ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { ++ *gdp = tmp; ++ break; ++ } ++ } ++ } ++ if (!*gdp) { ++ /* ++ * That failed: try linear search for a free inode ++ */ ++ i = EXT3_I(dir)->i_block_group + 1; ++ for (j = 2; j < EXT3_SB(sb)->s_groups_count; j++) { ++ if (++i >= EXT3_SB(sb)->s_groups_count) ++ i = 0; ++ tmp = ext3_get_group_desc (sb, i, bh); ++ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { ++ *gdp = tmp; ++ break; ++ } ++ } ++ } ++ ++ return i; ++} ++ ++static int ext3_find_group(const struct inode *dir, int mode, ++ struct ext3_group_desc **gdp, ++ struct buffer_head **bh) ++{ ++ if (S_ISDIR(mode)) ++ return ext3_find_group_dir(dir, gdp, bh); ++ return ext3_find_group_other(dir, gdp, bh); ++} ++ ++static int ext3_find_usable_inode(struct super_block *sb, ++ struct buffer_head *bh) ++{ ++ int here, maxinodes, next; ++ ++ maxinodes = EXT3_INODES_PER_GROUP(sb); ++ here = 0; ++ ++ while (here < maxinodes) { ++ next = ext3_find_next_zero_bit((unsigned long *) bh->b_data, ++ maxinodes, here); ++ if (next >= maxinodes) ++ return -1; ++ if (ext3_test_allocatable(next, bh)) ++ return next; ++ ++ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data); ++ here = ext3_find_next_zero_bit ++ ((unsigned long *) bh2jh(bh)->b_committed_data, ++ maxinodes, next); ++ } ++ return -1; ++} ++ + /* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both +@@ -337,7 +467,7 @@ + struct super_block * sb; + struct buffer_head * bh; + struct buffer_head * bh2; +- int i, j, avefreei; ++ int i, j, k; + struct inode * inode; + int bitmap_nr; + struct ext3_inode_info *ei; +@@ -376,11 +506,12 @@ + + bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; + +- BUFFER_TRACE(bh, "get_write_access"); +- err = ext3_journal_get_write_access(handle, bh); ++ BUFFER_TRACE(bh, "get_undo_access"); ++ err = ext3_journal_get_undo_access(handle, bh); + if (err) goto fail; + +- if (ext3_set_bit(j, bh->b_data)) { ++ if (!ext3_test_allocatable(j, bh) || ++ ext3_set_bit(j, bh->b_data)) { + printk(KERN_ERR "goal inode %lu unavailable\n", goal); + /* Oh well, we tried. */ + goto repeat; +@@ -398,119 +529,68 @@ + + repeat: + gdp = NULL; +- i = 0; +- +- if (S_ISDIR(mode)) { +- avefreei = le32_to_cpu(es->s_free_inodes_count) / +- sbi->s_groups_count; +- if (!gdp) { +- for (j = 0; j < sbi->s_groups_count; j++) { +- struct buffer_head *temp_buffer; +- tmp = ext3_get_group_desc (sb, j, &temp_buffer); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count) && +- le16_to_cpu(tmp->bg_free_inodes_count) >= +- avefreei) { +- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > +- le16_to_cpu(gdp->bg_free_blocks_count))) { +- i = j; +- gdp = tmp; +- bh2 = temp_buffer; +- } +- } +- } +- } +- } else { +- /* +- * Try to place the inode in its parent directory +- */ +- i = EXT3_I(dir)->i_block_group; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) +- gdp = tmp; +- else +- { +- /* +- * Use a quadratic hash to find a group with a +- * free inode +- */ +- for (j = 1; j < sbi->s_groups_count; j <<= 1) { +- i += j; +- if (i >= sbi->s_groups_count) +- i -= sbi->s_groups_count; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count)) { +- gdp = tmp; +- break; +- } +- } +- } +- if (!gdp) { +- /* +- * That failed: try linear search for a free inode +- */ +- i = EXT3_I(dir)->i_block_group + 1; +- for (j = 2; j < sbi->s_groups_count; j++) { +- if (++i >= sbi->s_groups_count) +- i = 0; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count)) { +- gdp = tmp; +- break; +- } +- } +- } +- } + ++ /* choose group */ ++ i = ext3_find_group(dir, mode, &gdp, &bh2); + err = -ENOSPC; + if (!gdp) + goto out; +- ++ + err = -EIO; +- bitmap_nr = load_inode_bitmap (sb, i); ++ bitmap_nr = load_inode_bitmap(sb, i); + if (bitmap_nr < 0) + goto fail; +- + bh = sbi->s_inode_bitmap[bitmap_nr]; + +- if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, +- sbi->s_inodes_per_group)) < +- sbi->s_inodes_per_group) { +- BUFFER_TRACE(bh, "get_write_access"); +- err = ext3_journal_get_write_access(handle, bh); +- if (err) goto fail; +- +- if (ext3_set_bit (j, bh->b_data)) { +- ext3_error (sb, "ext3_new_inode", +- "bit already set for inode %d", j); +- goto repeat; +- } +- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); +- err = ext3_journal_dirty_metadata(handle, bh); +- if (err) goto fail; +- } else { +- if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) { +- ext3_error (sb, "ext3_new_inode", +- "Free inodes count corrupted in group %d", +- i); +- /* Is it really ENOSPC? */ +- err = -ENOSPC; +- if (sb->s_flags & MS_RDONLY) +- goto fail; +- +- BUFFER_TRACE(bh2, "get_write_access"); +- err = ext3_journal_get_write_access(handle, bh2); +- if (err) goto fail; +- gdp->bg_free_inodes_count = 0; +- BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); +- err = ext3_journal_dirty_metadata(handle, bh2); +- if (err) goto fail; ++ /* try to allocate in selected group */ ++ if ((j = ext3_find_usable_inode(sb, bh)) >= 0) ++ goto find_free; ++ ++ /* can't allocate: try to allocate in ANY another groups */ ++ k = i; ++ err = -EIO; ++ for (i = i + 1; i != k; i++) { ++ if (i >= sbi->s_groups_count) ++ i = 0; ++ tmp = ext3_get_group_desc(sb, i, &bh2); ++ if (le16_to_cpu(tmp->bg_free_inodes_count) == 0) ++ continue; ++ ++ bitmap_nr = load_inode_bitmap(sb, i); ++ if (bitmap_nr < 0) ++ goto fail; ++ bh = sbi->s_inode_bitmap[bitmap_nr]; ++ ++ /* try to allocate in selected group */ ++ if ((j = ext3_find_usable_inode(sb, bh)) >= 0) { ++ gdp = tmp; ++ break; + } +- goto repeat; + } ++ err = -ENOSPC; ++ if (!gdp) ++ goto out; ++ ++ find_free: ++ BUFFER_TRACE(bh, "get_undo_access"); ++ err = ext3_journal_get_undo_access(handle, bh); ++ if (err) ++ goto fail; ++ ++ if (ext3_set_bit(j, bh->b_data)) { ++ ext3_error (sb, "ext3_new_inode", ++ "bit already set for inode %d", j); ++ goto fail; ++ } ++ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); ++ err = ext3_journal_dirty_metadata(handle, bh); ++ if (err) ++ goto fail; ++ + have_bit_and_group: ++ if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data) ++ J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data)); ++ + j += i * EXT3_INODES_PER_GROUP(sb) + 1; + if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { + ext3_error (sb, "ext3_new_inode", diff --git a/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.20.patch b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.20.patch new file mode 100644 index 0000000..18c69ff --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.20.patch @@ -0,0 +1,352 @@ +Index: linux-2.4.20/fs/ext3/ialloc.c +=================================================================== +--- linux-2.4.20.orig/fs/ext3/ialloc.c 2003-10-25 00:37:13.000000000 +0400 ++++ linux-2.4.20/fs/ext3/ialloc.c 2003-10-29 20:33:33.000000000 +0300 +@@ -241,11 +241,16 @@ + + bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; + +- BUFFER_TRACE(bh, "get_write_access"); +- fatal = ext3_journal_get_write_access(handle, bh); ++ BUFFER_TRACE(bh, "get_undo_access"); ++ fatal = ext3_journal_get_undo_access(handle, bh); + if (fatal) + goto error_return; + ++ /* to prevent inode reusing within single transaction -bzzz */ ++ BUFFER_TRACE(bh, "clear in b_committed_data"); ++ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data != NULL); ++ ext3_set_bit(bit, bh2jh(bh)->b_committed_data); ++ + /* Ok, now we can actually update the inode bitmaps.. */ + if (!ext3_clear_bit (bit, bh->b_data)) + ext3_error (sb, "ext3_free_inode", +@@ -319,6 +324,131 @@ + return 0; + } + ++static int ext3_test_allocatable(int nr, struct buffer_head *bh) ++{ ++ if (ext3_test_bit(nr, bh->b_data)) ++ return 0; ++ if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data) ++ return 1; ++#if 0 ++ if (!ext3_test_bit(nr, bh2jh(bh)->b_committed_data)) ++ printk("EXT3-fs: inode %d was used\n", nr); ++#endif ++ return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data); ++} ++ ++int ext3_find_group_dir(const struct inode *dir, ++ struct ext3_group_desc **gdp, ++ struct buffer_head **bh) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct ext3_super_block *es; ++ struct ext3_group_desc *tmp; ++ int i = 0, j, avefreei; ++ ++ es = EXT3_SB(sb)->s_es; ++ avefreei = le32_to_cpu(es->s_free_inodes_count) / ++ sb->u.ext3_sb.s_groups_count; ++ for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { ++ struct buffer_head *temp_buffer; ++ tmp = ext3_get_group_desc(sb, j, &temp_buffer); ++ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count) && ++ le16_to_cpu(tmp->bg_free_inodes_count) >= avefreei) { ++ if (!*gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > ++ le16_to_cpu((*gdp)->bg_free_blocks_count))) { ++ i = j; ++ *gdp = tmp; ++ *bh = temp_buffer; ++ } ++ } ++ } ++ ++ return i; ++} ++ ++int ext3_find_group_other(const struct inode *dir, ++ struct ext3_group_desc **gdp, ++ struct buffer_head **bh) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct ext3_group_desc *tmp; ++ int i, j; ++ ++ /* ++ * Try to place the inode in its parent directory ++ */ ++ i = dir->u.ext3_i.i_block_group; ++ tmp = ext3_get_group_desc(sb, i, bh); ++ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) ++ *gdp = tmp; ++ else { ++ /* ++ * Use a quadratic hash to find a group with a ++ * free inode ++ */ ++ for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { ++ i += j; ++ if (i >= sb->u.ext3_sb.s_groups_count) ++ i -= sb->u.ext3_sb.s_groups_count; ++ tmp = ext3_get_group_desc (sb, i, bh); ++ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { ++ *gdp = tmp; ++ break; ++ } ++ } ++ } ++ if (!*gdp) { ++ /* ++ * That failed: try linear search for a free inode ++ */ ++ i = dir->u.ext3_i.i_block_group + 1; ++ for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { ++ if (++i >= sb->u.ext3_sb.s_groups_count) ++ i = 0; ++ tmp = ext3_get_group_desc (sb, i, bh); ++ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { ++ *gdp = tmp; ++ break; ++ } ++ } ++ } ++ ++ return i; ++} ++ ++static int ext3_find_group(const struct inode *dir, int mode, ++ struct ext3_group_desc **gdp, ++ struct buffer_head **bh) ++{ ++ if (S_ISDIR(mode)) ++ return ext3_find_group_dir(dir, gdp, bh); ++ return ext3_find_group_other(dir, gdp, bh); ++} ++ ++static int ext3_find_usable_inode(struct super_block *sb, ++ struct buffer_head *bh) ++{ ++ int here, maxinodes, next; ++ ++ maxinodes = EXT3_INODES_PER_GROUP(sb); ++ here = 0; ++ ++ while (here < maxinodes) { ++ next = ext3_find_next_zero_bit((unsigned long *) bh->b_data, ++ maxinodes, here); ++ if (next >= maxinodes) ++ return -1; ++ if (ext3_test_allocatable(next, bh)) ++ return next; ++ ++ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data); ++ here = ext3_find_next_zero_bit ++ ((unsigned long *) bh2jh(bh)->b_committed_data, ++ maxinodes, next); ++ } ++ return -1; ++} ++ + /* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both +@@ -336,7 +466,7 @@ + struct super_block * sb; + struct buffer_head * bh; + struct buffer_head * bh2; +- int i, j, avefreei; ++ int i, j, k; + struct inode * inode; + int bitmap_nr; + struct ext3_group_desc * gdp; +@@ -371,11 +501,12 @@ + + bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; + +- BUFFER_TRACE(bh, "get_write_access"); +- err = ext3_journal_get_write_access(handle, bh); ++ BUFFER_TRACE(bh, "get_undo_access"); ++ err = ext3_journal_get_undo_access(handle, bh); + if (err) goto fail; + +- if (ext3_set_bit(j, bh->b_data)) { ++ if (!ext3_test_allocatable(j, bh) || ++ ext3_set_bit(j, bh->b_data)) { + printk(KERN_ERR "goal inode %lu unavailable\n", goal); + /* Oh well, we tried. */ + goto repeat; +@@ -393,119 +524,70 @@ + + repeat: + gdp = NULL; +- i = 0; +- +- if (S_ISDIR(mode)) { +- avefreei = le32_to_cpu(es->s_free_inodes_count) / +- sb->u.ext3_sb.s_groups_count; +- if (!gdp) { +- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { +- struct buffer_head *temp_buffer; +- tmp = ext3_get_group_desc (sb, j, &temp_buffer); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count) && +- le16_to_cpu(tmp->bg_free_inodes_count) >= +- avefreei) { +- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > +- le16_to_cpu(gdp->bg_free_blocks_count))) { +- i = j; +- gdp = tmp; +- bh2 = temp_buffer; +- } +- } +- } +- } +- } else { +- /* +- * Try to place the inode in its parent directory +- */ +- i = dir->u.ext3_i.i_block_group; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) +- gdp = tmp; +- else +- { +- /* +- * Use a quadratic hash to find a group with a +- * free inode +- */ +- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { +- i += j; +- if (i >= sb->u.ext3_sb.s_groups_count) +- i -= sb->u.ext3_sb.s_groups_count; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count)) { +- gdp = tmp; +- break; +- } +- } +- } +- if (!gdp) { +- /* +- * That failed: try linear search for a free inode +- */ +- i = dir->u.ext3_i.i_block_group + 1; +- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { +- if (++i >= sb->u.ext3_sb.s_groups_count) +- i = 0; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count)) { +- gdp = tmp; +- break; +- } +- } +- } +- } + ++ /* choose group */ ++ i = ext3_find_group(dir, mode, &gdp, &bh2); + err = -ENOSPC; + if (!gdp) + goto out; +- ++ + err = -EIO; +- bitmap_nr = load_inode_bitmap (sb, i); ++ bitmap_nr = load_inode_bitmap(sb, i); + if (bitmap_nr < 0) + goto fail; +- + bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; + +- if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, +- EXT3_INODES_PER_GROUP(sb))) < +- EXT3_INODES_PER_GROUP(sb)) { +- BUFFER_TRACE(bh, "get_write_access"); +- err = ext3_journal_get_write_access(handle, bh); +- if (err) goto fail; +- +- if (ext3_set_bit (j, bh->b_data)) { +- ext3_error (sb, "ext3_new_inode", +- "bit already set for inode %d", j); +- goto repeat; +- } +- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); +- err = ext3_journal_dirty_metadata(handle, bh); +- if (err) goto fail; +- } else { +- if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) { +- ext3_error (sb, "ext3_new_inode", +- "Free inodes count corrupted in group %d", +- i); +- /* Is it really ENOSPC? */ +- err = -ENOSPC; +- if (sb->s_flags & MS_RDONLY) +- goto fail; +- +- BUFFER_TRACE(bh2, "get_write_access"); +- err = ext3_journal_get_write_access(handle, bh2); +- if (err) goto fail; +- gdp->bg_free_inodes_count = 0; +- BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); +- err = ext3_journal_dirty_metadata(handle, bh2); +- if (err) goto fail; ++ /* try to allocate in selected group */ ++ j = ext3_find_usable_inode(sb, bh); ++ err = -ENOSPC; ++ if (j >= 0) ++ goto found_free; ++ ++ /* can't allocate: try to allocate in ANY another groups */ ++ k = i; ++ err = -EIO; ++ for (i = i + 1; i != k; i++) { ++ if (i >= sb->u.ext3_sb.s_groups_count) ++ i = 0; ++ tmp = ext3_get_group_desc(sb, i, &bh2); ++ if (le16_to_cpu(tmp->bg_free_inodes_count) == 0) ++ continue; ++ ++ bitmap_nr = load_inode_bitmap(sb, i); ++ if (bitmap_nr < 0) ++ goto fail; ++ bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; ++ ++ /* try to allocate in selected group */ ++ if ((j = ext3_find_usable_inode(sb, bh)) >= 0) { ++ gdp = tmp; ++ break; + } +- goto repeat; + } ++ err = -ENOSPC; ++ if (!gdp) ++ goto out; ++ ++ found_free: ++ BUFFER_TRACE(bh, "get_undo_access"); ++ err = ext3_journal_get_undo_access(handle, bh); ++ if (err) ++ goto fail; ++ ++ if (ext3_set_bit(j, bh->b_data)) { ++ ext3_error (sb, "ext3_new_inode", ++ "bit already set for inode %d", j); ++ goto fail; ++ } ++ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); ++ err = ext3_journal_dirty_metadata(handle, bh); ++ if (err) ++ goto fail; ++ + have_bit_and_group: ++ if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data) ++ J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data)); ++ + j += i * EXT3_INODES_PER_GROUP(sb) + 1; + if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { + ext3_error (sb, "ext3_new_inode", diff --git a/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.22.patch b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.22.patch new file mode 100644 index 0000000..2bbebbf --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.22.patch @@ -0,0 +1,187 @@ +Index: linux-2.4.22-ac1/fs/ext3/ialloc.c +=================================================================== +--- linux-2.4.22-ac1.orig/fs/ext3/ialloc.c 2003-10-22 14:20:03.000000000 +0400 ++++ linux-2.4.22-ac1/fs/ext3/ialloc.c 2003-10-29 22:32:28.000000000 +0300 +@@ -242,11 +242,16 @@ + + bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; + +- BUFFER_TRACE(bh, "get_write_access"); +- fatal = ext3_journal_get_write_access(handle, bh); ++ BUFFER_TRACE(bh, "get_undo_access"); ++ fatal = ext3_journal_get_undo_access(handle, bh); + if (fatal) + goto error_return; + ++ /* to prevent inode reusing within single transaction -bzzz */ ++ BUFFER_TRACE(bh, "clear in b_committed_data"); ++ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data != NULL); ++ ext3_set_bit(bit, bh2jh(bh)->b_committed_data); ++ + /* Ok, now we can actually update the inode bitmaps.. */ + if (!ext3_clear_bit (bit, bh->b_data)) + ext3_error (sb, "ext3_free_inode", +@@ -320,6 +325,43 @@ + return 0; + } + ++static int ext3_test_allocatable(int nr, struct buffer_head *bh) ++{ ++ if (ext3_test_bit(nr, bh->b_data)) ++ return 0; ++ if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data) ++ return 1; ++#if 0 ++ if (!ext3_test_bit(nr, bh2jh(bh)->b_committed_data)) ++ printk("EXT3-fs: inode %d was used\n", nr); ++#endif ++ return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data); ++} ++ ++static int ext3_find_usable_inode(struct super_block *sb, ++ struct buffer_head *bh) ++{ ++ int here, maxinodes, next; ++ ++ maxinodes = EXT3_INODES_PER_GROUP(sb); ++ here = 0; ++ ++ while (here < maxinodes) { ++ next = ext3_find_next_zero_bit((unsigned long *) bh->b_data, ++ maxinodes, here); ++ if (next >= maxinodes) ++ return -1; ++ if (ext3_test_allocatable(next, bh)) ++ return next; ++ ++ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data); ++ here = ext3_find_next_zero_bit ++ ((unsigned long *) bh2jh(bh)->b_committed_data, ++ maxinodes, next); ++ } ++ return -1; ++} ++ + /* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both +@@ -530,7 +572,7 @@ + struct super_block * sb; + struct buffer_head * bh; + struct buffer_head * bh2; +- int group; ++ int group, k; + ino_t ino; + struct inode * inode; + int bitmap_nr; +@@ -569,7 +611,8 @@ + err = ext3_journal_get_write_access(handle, bh); + if (err) goto fail; + +- if (ext3_set_bit(ino, bh->b_data)) { ++ if (!ext3_test_allocatable(ino, bh) || ++ ext3_set_bit(ino, bh->b_data)) { + printk(KERN_ERR "goal inode %lu unavailable\n", goal); + /* Oh well, we tried. */ + goto repeat; +@@ -595,52 +638,63 @@ + group = find_group_other(sb, dir); + + err = -ENOSPC; +- if (!group == -1) ++ if (group == -1) + goto out; + + err = -EIO; +- bitmap_nr = load_inode_bitmap (sb, group); ++ bitmap_nr = load_inode_bitmap(sb, group); + if (bitmap_nr < 0) + goto fail; +- + bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; +- gdp = ext3_get_group_desc (sb, group, &bh2); + +- if ((ino = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, +- EXT3_INODES_PER_GROUP(sb))) < +- EXT3_INODES_PER_GROUP(sb)) { +- BUFFER_TRACE(bh, "get_write_access"); +- err = ext3_journal_get_write_access(handle, bh); +- if (err) goto fail; +- +- if (ext3_set_bit (ino, bh->b_data)) { +- ext3_error (sb, "ext3_new_inode", +- "bit already set for inode %lu", ino); +- goto repeat; +- } +- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); +- err = ext3_journal_dirty_metadata(handle, bh); +- if (err) goto fail; +- } else { +- if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) { +- ext3_error (sb, "ext3_new_inode", +- "Free inodes count corrupted in group %d", +- group); +- /* Is it really ENOSPC? */ +- err = -ENOSPC; +- if (sb->s_flags & MS_RDONLY) +- goto fail; +- +- BUFFER_TRACE(bh2, "get_write_access"); +- err = ext3_journal_get_write_access(handle, bh2); +- if (err) goto fail; +- gdp->bg_free_inodes_count = 0; +- BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); +- err = ext3_journal_dirty_metadata(handle, bh2); +- if (err) goto fail; ++ /* try to allocate in selected group */ ++ if ((ino = ext3_find_usable_inode(sb, bh)) >= 0) ++ goto find_free; ++ ++ /* can't allocate: try to allocate in ANY another groups */ ++ k = group; ++ gdp = NULL; ++ err = -EIO; ++ for (group = group + 1; group != k; group++) { ++ struct ext3_group_desc *tmp; ++ ++ if (group >= sb->u.ext3_sb.s_groups_count) ++ group = 0; ++ tmp = ext3_get_group_desc(sb, group, &bh2); ++ if (le16_to_cpu(tmp->bg_free_inodes_count) == 0) ++ continue; ++ ++ bitmap_nr = load_inode_bitmap(sb, group); ++ if (bitmap_nr < 0) ++ goto fail; ++ bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; ++ ++ /* try to allocate in selected group */ ++ if ((ino = ext3_find_usable_inode(sb, bh)) >= 0) { ++ gdp = tmp; ++ break; + } +- goto repeat; + } ++ err = -ENOSPC; ++ if (!gdp) ++ goto out; ++ ++find_free: ++ BUFFER_TRACE(bh, "get_undo_access"); ++ err = ext3_journal_get_undo_access(handle, bh); ++ if (err) ++ goto fail; ++ ++ if (ext3_set_bit(ino, bh->b_data)) { ++ ext3_error (sb, "ext3_new_inode", ++ "bit already set for inode %lu", ++ (unsigned long) ino); ++ goto fail; ++ } ++ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); ++ err = ext3_journal_dirty_metadata(handle, bh); ++ if (err) ++ goto fail; + + have_bit_and_group: + ino += group * EXT3_INODES_PER_GROUP(sb) + 1; diff --git a/lustre/kernel_patches/patches/ext3-orphan_lock-suse.patch b/lustre/kernel_patches/patches/ext3-orphan_lock-suse.patch new file mode 100644 index 0000000..c3369e6 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-orphan_lock-suse.patch @@ -0,0 +1,81 @@ +Index: linux-2.4.21-suse/fs/ext3/namei.c +=================================================================== +--- linux-2.4.21-suse.orig/fs/ext3/namei.c 2003-10-30 02:17:22.000000000 +0300 ++++ linux-2.4.21-suse/fs/ext3/namei.c 2003-10-30 02:20:53.000000000 +0300 +@@ -1747,8 +1747,8 @@ + struct super_block *sb = inode->i_sb; + struct ext3_iloc iloc; + int err = 0, rc; +- +- lock_super(sb); ++ ++ down(&EXT3_SB(sb)->s_orphan_lock); + if (!list_empty(&EXT3_I(inode)->i_orphan)) + goto out_unlock; + +@@ -1796,7 +1796,7 @@ + jbd_debug(4, "orphan inode %ld will point to %d\n", + inode->i_ino, NEXT_ORPHAN(inode)); + out_unlock: +- unlock_super(sb); ++ up(&EXT3_SB(sb)->s_orphan_lock); + ext3_std_error(inode->i_sb, err); + return err; + } +@@ -1808,20 +1808,19 @@ + int ext3_orphan_del(handle_t *handle, struct inode *inode) + { + struct list_head *prev; +- struct ext3_sb_info *sbi; ++ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); + unsigned long ino_next; + struct ext3_iloc iloc; + int err = 0; + +- lock_super(inode->i_sb); ++ down(&sbi->s_orphan_lock); + if (list_empty(&inode->u.ext3_i.i_orphan)) { +- unlock_super(inode->i_sb); ++ up(&sbi->s_orphan_lock); + return 0; + } + + ino_next = NEXT_ORPHAN(inode); + prev = inode->u.ext3_i.i_orphan.prev; +- sbi = EXT3_SB(inode->i_sb); + + jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); + +@@ -1870,7 +1869,7 @@ + out_err: + ext3_std_error(inode->i_sb, err); + out: +- unlock_super(inode->i_sb); ++ up(&sbi->s_orphan_lock); + return err; + + out_brelse: +Index: linux-2.4.21-suse/fs/ext3/super.c +=================================================================== +--- linux-2.4.21-suse.orig/fs/ext3/super.c 2003-10-30 02:17:22.000000000 +0300 ++++ linux-2.4.21-suse/fs/ext3/super.c 2003-10-30 02:17:22.000000000 +0300 +@@ -1151,6 +1151,7 @@ + */ + sb->s_op = &ext3_sops; + INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ ++ sema_init(&sbi->s_orphan_lock, 1); + + sb->s_root = 0; + +Index: linux-2.4.21-suse/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.4.21-suse.orig/include/linux/ext3_fs_sb.h 2003-10-30 02:17:22.000000000 +0300 ++++ linux-2.4.21-suse/include/linux/ext3_fs_sb.h 2003-10-30 02:17:22.000000000 +0300 +@@ -69,6 +69,7 @@ + struct inode * s_journal_inode; + struct journal_s * s_journal; + struct list_head s_orphan; ++ struct semaphore s_orphan_lock; + unsigned long s_commit_interval; + struct block_device *journal_bdev; + #ifdef CONFIG_JBD_DEBUG diff --git a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch new file mode 100644 index 0000000..9318fc1 --- /dev/null +++ b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch @@ -0,0 +1,5349 @@ + Documentation/Configure.help | 66 ++ + arch/alpha/defconfig | 7 + arch/alpha/kernel/entry.S | 12 + arch/arm/defconfig | 7 + arch/arm/kernel/calls.S | 24 + arch/i386/defconfig | 7 + arch/ia64/defconfig | 7 + arch/ia64/kernel/entry.S | 24 + arch/m68k/defconfig | 7 + arch/mips/defconfig | 7 + arch/mips64/defconfig | 7 + arch/ppc/defconfig | 14 + arch/ppc64/kernel/misc.S | 2 + arch/s390/defconfig | 7 + arch/s390/kernel/entry.S | 24 + arch/s390x/defconfig | 7 + arch/s390x/kernel/entry.S | 24 + arch/s390x/kernel/wrapper32.S | 92 +++ + arch/sparc/defconfig | 7 + arch/sparc/kernel/systbls.S | 10 + arch/sparc64/defconfig | 7 + arch/sparc64/kernel/systbls.S | 20 + fs/Config.in | 14 + fs/Makefile | 3 + fs/ext2/Makefile | 4 + fs/ext2/file.c | 5 + fs/ext2/ialloc.c | 2 + fs/ext2/inode.c | 34 - + fs/ext2/namei.c | 14 + fs/ext2/super.c | 29 + fs/ext2/symlink.c | 14 + fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ + fs/ext2/xattr_user.c | 103 +++ + fs/ext3/Makefile | 10 + fs/ext3/file.c | 5 + fs/ext3/ialloc.c | 2 + fs/ext3/inode.c | 35 - + fs/ext3/namei.c | 21 + fs/ext3/super.c | 36 + + fs/ext3/symlink.c | 14 + fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ + fs/ext3/xattr_user.c | 111 +++ + fs/jfs/jfs_xattr.h | 6 + fs/jfs/xattr.c | 6 + fs/mbcache.c | 648 ++++++++++++++++++++++ + include/asm-arm/unistd.h | 2 + include/asm-ia64/unistd.h | 13 + include/asm-ppc64/unistd.h | 2 + include/asm-s390/unistd.h | 15 + include/asm-s390x/unistd.h | 15 + include/asm-sparc/unistd.h | 24 + include/asm-sparc64/unistd.h | 24 + include/linux/cache_def.h | 15 + include/linux/errno.h | 4 + include/linux/ext2_fs.h | 31 - + include/linux/ext2_xattr.h | 157 +++++ + include/linux/ext3_fs.h | 31 - + include/linux/ext3_jbd.h | 8 + include/linux/ext3_xattr.h | 157 +++++ + include/linux/fs.h | 2 + include/linux/mbcache.h | 69 ++ + kernel/ksyms.c | 4 + mm/vmscan.c | 35 + + fs/ext3/ext3-exports.c | 14 + + 64 files changed, 4355 insertions(+), 195 deletions(-) + +--- linux-2.4.20/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54 2003-05-05 17:43:06.000000000 +0800 ++++ linux-2.4.20-root/Documentation/Configure.help 2003-05-07 18:08:03.000000000 +0800 +@@ -15242,6 +15242,39 @@ CONFIG_EXT2_FS + be compiled as a module, and so this could be dangerous. Most + everyone wants to say Y here. + ++Ext2 extended attributes ++CONFIG_EXT2_FS_XATTR ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ for details). ++ ++ If unsure, say N. ++ ++Ext2 extended attribute block sharing ++CONFIG_EXT2_FS_XATTR_SHARING ++ This options enables code for sharing identical extended attribute ++ blocks among multiple inodes. ++ ++ Usually, say Y. ++ ++Ext2 extended user attributes ++CONFIG_EXT2_FS_XATTR_USER ++ This option enables extended user attributes on ext2. Processes can ++ associate extended user attributes with inodes to store additional ++ information such as the character encoding of files, etc. (see the ++ attr(5) manual page, or visit for details). ++ ++ If unsure, say N. ++ ++Ext2 trusted extended attributes ++CONFIG_EXT2_FS_XATTR_TRUSTED ++ This option enables extended attributes on ext2 that are accessible ++ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this ++ is only the super user. Trusted extended attributes are meant for ++ implementing system/security services. ++ ++ If unsure, say N. ++ + Ext3 journalling file system support (EXPERIMENTAL) + CONFIG_EXT3_FS + This is the journalling version of the Second extended file system +@@ -15274,6 +15307,39 @@ CONFIG_EXT3_FS + of your root partition (the one containing the directory /) cannot + be compiled as a module, and so this may be dangerous. + ++Ext3 extended attributes ++CONFIG_EXT3_FS_XATTR ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ for details). ++ ++ If unsure, say N. ++ ++Ext3 extended attribute block sharing ++CONFIG_EXT3_FS_XATTR_SHARING ++ This options enables code for sharing identical extended attribute ++ blocks among multiple inodes. ++ ++ Usually, say Y. ++ ++Ext3 extended user attributes ++CONFIG_EXT3_FS_XATTR_USER ++ This option enables extended user attributes on ext3. Processes can ++ associate extended user attributes with inodes to store additional ++ information such as the character encoding of files, etc. (see the ++ attr(5) manual page, or visit for details). ++ ++ If unsure, say N. ++ ++Ext3 trusted extended attributes ++CONFIG_EXT3_FS_XATTR_TRUSTED ++ This option enables extended attributes on ext3 that are accessible ++ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this ++ is only the super user. Trusted extended attributes are meant for ++ implementing system/security services. ++ ++ If unsure, say N. ++ + Journal Block Device support (JBD for ext3) (EXPERIMENTAL) + CONFIG_JBD + This is a generic journalling layer for block devices. It is +--- linux-2.4.20/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54 2001-11-20 07:19:42.000000000 +0800 ++++ linux-2.4.20-root/arch/alpha/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + CONFIG_ALPHA=y + # CONFIG_UID16 is not set + # CONFIG_RWSEM_GENERIC_SPINLOCK is not set +--- linux-2.4.20/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:42.000000000 +0800 ++++ linux-2.4.20-root/arch/alpha/kernel/entry.S 2003-05-07 18:08:03.000000000 +0800 +@@ -1154,6 +1154,18 @@ sys_call_table: + .quad sys_readahead + .quad sys_ni_syscall /* 380, sys_security */ + .quad sys_tkill ++ .quad sys_setxattr ++ .quad sys_lsetxattr ++ .quad sys_fsetxattr ++ .quad sys_getxattr /* 385 */ ++ .quad sys_lgetxattr ++ .quad sys_fgetxattr ++ .quad sys_listxattr ++ .quad sys_llistxattr ++ .quad sys_flistxattr /* 390 */ ++ .quad sys_removexattr ++ .quad sys_lremovexattr ++ .quad sys_fremovexattr + + /* Remember to update everything, kids. */ + .ifne (. - sys_call_table) - (NR_SYSCALLS * 8) +--- linux-2.4.20/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54 2001-05-20 08:43:05.000000000 +0800 ++++ linux-2.4.20-root/arch/arm/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + CONFIG_ARM=y + # CONFIG_EISA is not set + # CONFIG_SBUS is not set +--- linux-2.4.20/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:42.000000000 +0800 ++++ linux-2.4.20-root/arch/arm/kernel/calls.S 2003-05-07 18:08:03.000000000 +0800 +@@ -240,18 +240,18 @@ __syscall_start: + .long SYMBOL_NAME(sys_ni_syscall) /* Security */ + .long SYMBOL_NAME(sys_gettid) + /* 225 */ .long SYMBOL_NAME(sys_readahead) +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_setxattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_lsetxattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fsetxattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_getxattr */ +-/* 230 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_lgetxattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fgetxattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_listxattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_llistxattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_flistxattr */ +-/* 235 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_removexattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_lremovexattr */ +- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fremovexattr */ ++ .long SYMBOL_NAME(sys_setxattr) ++ .long SYMBOL_NAME(sys_lsetxattr) ++ .long SYMBOL_NAME(sys_fsetxattr) ++ .long SYMBOL_NAME(sys_getxattr) ++/* 230 */ .long SYMBOL_NAME(sys_lgetxattr) ++ .long SYMBOL_NAME(sys_fgetxattr) ++ .long SYMBOL_NAME(sys_listxattr) ++ .long SYMBOL_NAME(sys_llistxattr) ++ .long SYMBOL_NAME(sys_flistxattr) ++/* 235 */ .long SYMBOL_NAME(sys_removexattr) ++ .long SYMBOL_NAME(sys_lremovexattr) ++ .long SYMBOL_NAME(sys_fremovexattr) + .long SYMBOL_NAME(sys_tkill) + /* + * Please check 2.5 _before_ adding calls here, +--- linux-2.4.20/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:09.000000000 +0800 ++++ linux-2.4.20-root/arch/i386/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + CONFIG_X86=y + CONFIG_ISA=y + # CONFIG_SBUS is not set +--- linux-2.4.20/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:09.000000000 +0800 ++++ linux-2.4.20-root/arch/ia64/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + + # + # Code maturity level options +--- linux-2.4.20/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54 2000-06-20 03:56:08.000000000 +0800 ++++ linux-2.4.20-root/arch/m68k/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + CONFIG_UID16=y + + # +--- linux-2.4.20/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:10.000000000 +0800 ++++ linux-2.4.20-root/arch/mips/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + CONFIG_MIPS=y + CONFIG_MIPS32=y + # CONFIG_MIPS64 is not set +--- linux-2.4.20/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:10.000000000 +0800 ++++ linux-2.4.20-root/arch/mips64/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + CONFIG_MIPS=y + # CONFIG_MIPS32 is not set + CONFIG_MIPS64=y +--- linux-2.4.20/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 ++++ linux-2.4.20-root/arch/s390/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + # CONFIG_ISA is not set + # CONFIG_EISA is not set + # CONFIG_MCA is not set +--- linux-2.4.20/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 ++++ linux-2.4.20-root/arch/s390/kernel/entry.S 2003-05-07 18:08:03.000000000 +0800 +@@ -558,18 +558,18 @@ sys_call_table: + .long sys_fcntl64 + .long sys_ni_syscall + .long sys_ni_syscall +- .long sys_ni_syscall /* 224 - reserved for setxattr */ +- .long sys_ni_syscall /* 225 - reserved for lsetxattr */ +- .long sys_ni_syscall /* 226 - reserved for fsetxattr */ +- .long sys_ni_syscall /* 227 - reserved for getxattr */ +- .long sys_ni_syscall /* 228 - reserved for lgetxattr */ +- .long sys_ni_syscall /* 229 - reserved for fgetxattr */ +- .long sys_ni_syscall /* 230 - reserved for listxattr */ +- .long sys_ni_syscall /* 231 - reserved for llistxattr */ +- .long sys_ni_syscall /* 232 - reserved for flistxattr */ +- .long sys_ni_syscall /* 233 - reserved for removexattr */ +- .long sys_ni_syscall /* 234 - reserved for lremovexattr */ +- .long sys_ni_syscall /* 235 - reserved for fremovexattr */ ++ .long sys_setxattr ++ .long sys_lsetxattr /* 225 */ ++ .long sys_fsetxattr ++ .long sys_getxattr ++ .long sys_lgetxattr ++ .long sys_fgetxattr ++ .long sys_listxattr /* 230 */ ++ .long sys_llistxattr ++ .long sys_flistxattr ++ .long sys_removexattr ++ .long sys_lremovexattr ++ .long sys_fremovexattr /* 235 */ + .long sys_gettid + .long sys_tkill + .rept 255-237 +--- linux-2.4.20/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 ++++ linux-2.4.20-root/arch/s390x/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + # CONFIG_ISA is not set + # CONFIG_EISA is not set + # CONFIG_MCA is not set +--- linux-2.4.20/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 ++++ linux-2.4.20-root/arch/s390x/kernel/entry.S 2003-05-07 18:08:03.000000000 +0800 +@@ -591,18 +591,18 @@ sys_call_table: + .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) + .long SYSCALL(sys_ni_syscall,sys_ni_syscall) + .long SYSCALL(sys_ni_syscall,sys_ni_syscall) +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 224 - reserved for setxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 225 - reserved for lsetxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 226 - reserved for fsetxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 227 - reserved for getxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 228 - reserved for lgetxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 229 - reserved for fgetxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 230 - reserved for listxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 231 - reserved for llistxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 232 - reserved for flistxattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 233 - reserved for removexattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 234 - reserved for lremovexattr */ +- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 235 - reserved for fremovexattr */ ++ .long SYSCALL(sys_setxattr,sys32_setxattr_wrapper) ++ .long SYSCALL(sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ ++ .long SYSCALL(sys_fsetxattr,sys32_fsetxattr_wrapper) ++ .long SYSCALL(sys_getxattr,sys32_getxattr_wrapper) ++ .long SYSCALL(sys_lgetxattr,sys32_lgetxattr_wrapper) ++ .long SYSCALL(sys_fgetxattr,sys32_fgetxattr_wrapper) ++ .long SYSCALL(sys_listxattr,sys32_listxattr_wrapper) /* 230 */ ++ .long SYSCALL(sys_llistxattr,sys32_llistxattr_wrapper) ++ .long SYSCALL(sys_flistxattr,sys32_flistxattr_wrapper) ++ .long SYSCALL(sys_removexattr,sys32_removexattr_wrapper) ++ .long SYSCALL(sys_lremovexattr,sys32_lremovexattr_wrapper) ++ .long SYSCALL(sys_fremovexattr,sys32_fremovexattr_wrapper)/* 235 */ + .long SYSCALL(sys_gettid,sys_gettid) + .long SYSCALL(sys_tkill,sys_tkill) + .rept 255-237 +--- linux-2.4.20/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54 2002-02-26 03:37:56.000000000 +0800 ++++ linux-2.4.20-root/arch/s390x/kernel/wrapper32.S 2003-05-07 18:08:03.000000000 +0800 +@@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper: + llgtr %r3,%r3 # struct stat64 * + llgfr %r4,%r4 # long + jg sys32_fstat64 # branch to system call ++ ++ .globl sys32_setxattr_wrapper ++sys32_setxattr_wrapper: ++ llgtr %r2,%r2 # char * ++ llgtr %r3,%r3 # char * ++ llgtr %r4,%r4 # void * ++ llgfr %r5,%r5 # size_t ++ lgfr %r6,%r6 # int ++ jg sys_setxattr ++ ++ .globl sys32_lsetxattr_wrapper ++sys32_lsetxattr_wrapper: ++ llgtr %r2,%r2 # char * ++ llgtr %r3,%r3 # char * ++ llgtr %r4,%r4 # void * ++ llgfr %r5,%r5 # size_t ++ lgfr %r6,%r6 # int ++ jg sys_lsetxattr ++ ++ .globl sys32_fsetxattr_wrapper ++sys32_fsetxattr_wrapper: ++ lgfr %r2,%r2 # int ++ llgtr %r3,%r3 # char * ++ llgtr %r4,%r4 # void * ++ llgfr %r5,%r5 # size_t ++ lgfr %r6,%r6 # int ++ jg sys_fsetxattr ++ ++ .globl sys32_getxattr_wrapper ++sys32_getxattr_wrapper: ++ llgtr %r2,%r2 # char * ++ llgtr %r3,%r3 # char * ++ llgtr %r4,%r4 # void * ++ llgfr %r5,%r5 # size_t ++ jg sys_getxattr ++ ++ .globl sys32_lgetxattr_wrapper ++sys32_lgetxattr_wrapper: ++ llgtr %r2,%r2 # char * ++ llgtr %r3,%r3 # char * ++ llgtr %r4,%r4 # void * ++ llgfr %r5,%r5 # size_t ++ jg sys_lgetxattr ++ ++ .globl sys32_fgetxattr_wrapper ++sys32_fgetxattr_wrapper: ++ lgfr %r2,%r2 # int ++ llgtr %r3,%r3 # char * ++ llgtr %r4,%r4 # void * ++ llgfr %r5,%r5 # size_t ++ jg sys_fgetxattr ++ ++ .globl sys32_listxattr_wrapper ++sys32_listxattr_wrapper: ++ llgtr %r2,%r2 # char * ++ llgtr %r3,%r3 # char * ++ llgfr %r4,%r4 # size_t ++ jg sys_listxattr ++ ++ .globl sys32_llistxattr_wrapper ++sys32_llistxattr_wrapper: ++ llgtr %r2,%r2 # char * ++ llgtr %r3,%r3 # char * ++ llgfr %r4,%r4 # size_t ++ jg sys_llistxattr ++ ++ .globl sys32_flistxattr_wrapper ++sys32_flistxattr_wrapper: ++ lgfr %r2,%r2 # int ++ llgtr %r3,%r3 # char * ++ llgfr %r4,%r4 # size_t ++ jg sys_flistxattr ++ ++ .globl sys32_removexattr_wrapper ++sys32_removexattr_wrapper: ++ llgtr %r2,%r2 # char * ++ llgtr %r3,%r3 # char * ++ jg sys_removexattr ++ ++ .globl sys32_lremovexattr_wrapper ++sys32_lremovexattr_wrapper: ++ llgtr %r2,%r2 # char * ++ llgtr %r3,%r3 # char * ++ jg sys_lremovexattr ++ ++ .globl sys32_fremovexattr_wrapper ++sys32_fremovexattr_wrapper: ++ lgfr %r2,%r2 # int ++ llgtr %r3,%r3 # char * ++ jg sys_fremovexattr ++ ++ +--- linux-2.4.20/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:12.000000000 +0800 ++++ linux-2.4.20-root/arch/sparc64/defconfig 2003-05-07 18:08:03.000000000 +0800 +@@ -1,6 +1,13 @@ + # + # Automatically generated make config: don't edit + # ++# CONFIG_EXT3_FS_XATTR is not set ++# CONFIG_EXT3_FS_XATTR_SHARING is not set ++# CONFIG_EXT3_FS_XATTR_USER is not set ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT2_FS_XATTR_SHARING is not set ++# CONFIG_EXT2_FS_XATTR_USER is not set ++# CONFIG_FS_MBCACHE is not set + + # + # Code maturity level options +--- linux-2.4.20/fs/Config.in~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/fs/Config.in 2003-05-07 18:08:03.000000000 +0800 +@@ -25,6 +25,11 @@ dep_mbool ' Debug Befs' CONFIG_BEFS_DEB + dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL + + tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS ++dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS ++dep_bool ' Ext3 extended attribute block sharing' \ ++ CONFIG_EXT3_FS_XATTR_SHARING $CONFIG_EXT3_FS_XATTR ++dep_bool ' Ext3 extended user attributes' \ ++ CONFIG_EXT3_FS_XATTR_USER $CONFIG_EXT3_FS_XATTR + # CONFIG_JBD could be its own option (even modular), but until there are + # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS + # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS +@@ -84,6 +89,11 @@ dep_mbool ' QNX4FS write support (DANGE + tristate 'ROM file system support' CONFIG_ROMFS_FS + + tristate 'Second extended fs support' CONFIG_EXT2_FS ++dep_mbool ' Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS ++dep_bool ' Ext2 extended attribute block sharing' \ ++ CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR ++dep_bool ' Ext2 extended user attributes' \ ++ CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR + + tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS + +@@ -155,6 +165,10 @@ else + define_tristate CONFIG_ZISOFS_FS n + fi + ++# Meta block cache for Extended Attributes (ext2/ext3) ++#tristate 'Meta block cache' CONFIG_FS_MBCACHE ++define_tristate CONFIG_FS_MBCACHE y ++ + mainmenu_option next_comment + comment 'Partition Types' + source fs/partitions/Config.in +--- linux-2.4.20/fs/Makefile~linux-2.4.20-xattr-0.8.54 2003-05-05 19:00:58.000000000 +0800 ++++ linux-2.4.20-root/fs/Makefile 2003-05-07 18:08:03.000000000 +0800 +@@ -79,6 +79,9 @@ obj-y += binfmt_script.o + + obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o + ++export-objs += mbcache.o ++obj-$(CONFIG_FS_MBCACHE) += mbcache.o ++ + # persistent filesystems + obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) + +--- linux-2.4.20/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54 2001-10-11 23:05:18.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/Makefile 2003-05-07 18:08:03.000000000 +0800 +@@ -13,4 +13,8 @@ obj-y := balloc.o bitmap.o dir.o file + ioctl.o namei.o super.o symlink.o + obj-m := $(O_TARGET) + ++export-objs += xattr.o ++obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o ++obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o ++ + include $(TOPDIR)/Rules.make +--- linux-2.4.20/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54 2001-10-11 23:05:18.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/file.c 2003-05-07 18:08:03.000000000 +0800 +@@ -20,6 +20,7 @@ + + #include + #include ++#include + #include + + /* +@@ -51,4 +52,8 @@ struct file_operations ext2_file_operati + + struct inode_operations ext2_file_inode_operations = { + truncate: ext2_truncate, ++ setxattr: ext2_setxattr, ++ getxattr: ext2_getxattr, ++ listxattr: ext2_listxattr, ++ removexattr: ext2_removexattr, + }; +--- linux-2.4.20/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/ialloc.c 2003-05-07 18:08:03.000000000 +0800 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -167,6 +168,7 @@ void ext2_free_inode (struct inode * ino + */ + if (!is_bad_inode(inode)) { + /* Quota is already initialized in iput() */ ++ ext2_xattr_delete_inode(inode); + DQUOT_FREE_INODE(inode); + DQUOT_DROP(inode); + } +--- linux-2.4.20/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/inode.c 2003-05-07 18:08:03.000000000 +0800 +@@ -39,6 +39,18 @@ MODULE_LICENSE("GPL"); + static int ext2_update_inode(struct inode * inode, int do_sync); + + /* ++ * Test whether an inode is a fast symlink. ++ */ ++static inline int ext2_inode_is_fast_symlink(struct inode *inode) ++{ ++ int ea_blocks = inode->u.ext2_i.i_file_acl ? ++ (inode->i_sb->s_blocksize >> 9) : 0; ++ ++ return (S_ISLNK(inode->i_mode) && ++ inode->i_blocks - ea_blocks == 0); ++} ++ ++/* + * Called at each iput() + */ + void ext2_put_inode (struct inode * inode) +@@ -53,9 +65,7 @@ void ext2_delete_inode (struct inode * i + { + lock_kernel(); + +- if (is_bad_inode(inode) || +- inode->i_ino == EXT2_ACL_IDX_INO || +- inode->i_ino == EXT2_ACL_DATA_INO) ++ if (is_bad_inode(inode)) + goto no_delete; + inode->u.ext2_i.i_dtime = CURRENT_TIME; + mark_inode_dirty(inode); +@@ -801,6 +811,8 @@ void ext2_truncate (struct inode * inode + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; ++ if (ext2_inode_is_fast_symlink(inode)) ++ return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + +@@ -888,8 +900,7 @@ void ext2_read_inode (struct inode * ino + unsigned long offset; + struct ext2_group_desc * gdp; + +- if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && +- inode->i_ino != EXT2_ACL_DATA_INO && ++ if ((inode->i_ino != EXT2_ROOT_INO && + inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) || + inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) { + ext2_error (inode->i_sb, "ext2_read_inode", +@@ -974,10 +985,7 @@ void ext2_read_inode (struct inode * ino + for (block = 0; block < EXT2_N_BLOCKS; block++) + inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; + +- if (inode->i_ino == EXT2_ACL_IDX_INO || +- inode->i_ino == EXT2_ACL_DATA_INO) +- /* Nothing to do */ ; +- else if (S_ISREG(inode->i_mode)) { ++ if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext2_file_inode_operations; + inode->i_fop = &ext2_file_operations; + inode->i_mapping->a_ops = &ext2_aops; +@@ -986,15 +994,17 @@ void ext2_read_inode (struct inode * ino + inode->i_fop = &ext2_dir_operations; + inode->i_mapping->a_ops = &ext2_aops; + } else if (S_ISLNK(inode->i_mode)) { +- if (!inode->i_blocks) ++ if (ext2_inode_is_fast_symlink(inode)) + inode->i_op = &ext2_fast_symlink_inode_operations; + else { +- inode->i_op = &page_symlink_inode_operations; ++ inode->i_op = &ext2_symlink_inode_operations; + inode->i_mapping->a_ops = &ext2_aops; + } +- } else ++ } else { ++ inode->i_op = &ext2_special_inode_operations; + init_special_inode(inode, inode->i_mode, + le32_to_cpu(raw_inode->i_block[0])); ++ } + brelse (bh); + inode->i_attr_flags = 0; + if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) { +--- linux-2.4.20/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54 2001-10-04 13:57:36.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/namei.c 2003-05-07 18:08:03.000000000 +0800 +@@ -31,6 +31,7 @@ + + #include + #include ++#include + #include + + /* +@@ -136,7 +137,7 @@ static int ext2_symlink (struct inode * + + if (l > sizeof (inode->u.ext2_i.i_data)) { + /* slow symlink */ +- inode->i_op = &page_symlink_inode_operations; ++ inode->i_op = &ext2_symlink_inode_operations; + inode->i_mapping->a_ops = &ext2_aops; + err = block_symlink(inode, symname, l); + if (err) +@@ -345,4 +346,15 @@ struct inode_operations ext2_dir_inode_o + rmdir: ext2_rmdir, + mknod: ext2_mknod, + rename: ext2_rename, ++ setxattr: ext2_setxattr, ++ getxattr: ext2_getxattr, ++ listxattr: ext2_listxattr, ++ removexattr: ext2_removexattr, ++}; ++ ++struct inode_operations ext2_special_inode_operations = { ++ setxattr: ext2_setxattr, ++ getxattr: ext2_getxattr, ++ listxattr: ext2_listxattr, ++ removexattr: ext2_removexattr, + }; +--- linux-2.4.20/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/super.c 2003-05-07 18:08:03.000000000 +0800 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -125,6 +126,7 @@ void ext2_put_super (struct super_block + int db_count; + int i; + ++ ext2_xattr_put_super(sb); + if (!(sb->s_flags & MS_RDONLY)) { + struct ext2_super_block *es = EXT2_SB(sb)->s_es; + +@@ -175,6 +177,13 @@ static int parse_options (char * options + this_char = strtok (NULL, ",")) { + if ((value = strchr (this_char, '=')) != NULL) + *value++ = 0; ++#ifdef CONFIG_EXT2_FS_XATTR_USER ++ if (!strcmp (this_char, "user_xattr")) ++ set_opt (*mount_options, XATTR_USER); ++ else if (!strcmp (this_char, "nouser_xattr")) ++ clear_opt (*mount_options, XATTR_USER); ++ else ++#endif + if (!strcmp (this_char, "bsddf")) + clear_opt (*mount_options, MINIX_DF); + else if (!strcmp (this_char, "nouid32")) { +@@ -424,6 +433,9 @@ struct super_block * ext2_read_super (st + blocksize = BLOCK_SIZE; + + sb->u.ext2_sb.s_mount_opt = 0; ++#ifdef CONFIG_EXT2_FS_XATTR_USER ++ /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */ ++#endif + if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, + &sb->u.ext2_sb.s_mount_opt)) { + return NULL; +@@ -813,12 +825,27 @@ static DECLARE_FSTYPE_DEV(ext2_fs_type, + + static int __init init_ext2_fs(void) + { +- return register_filesystem(&ext2_fs_type); ++ int error = init_ext2_xattr(); ++ if (error) ++ return error; ++ error = init_ext2_xattr_user(); ++ if (error) ++ goto fail; ++ error = register_filesystem(&ext2_fs_type); ++ if (!error) ++ return 0; ++ ++ exit_ext2_xattr_user(); ++fail: ++ exit_ext2_xattr(); ++ return error; + } + + static void __exit exit_ext2_fs(void) + { + unregister_filesystem(&ext2_fs_type); ++ exit_ext2_xattr_user(); ++ exit_ext2_xattr(); + } + + EXPORT_NO_SYMBOLS; +--- linux-2.4.20/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54 2000-09-28 04:41:33.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/symlink.c 2003-05-07 18:08:03.000000000 +0800 +@@ -19,6 +19,7 @@ + + #include + #include ++#include + + static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) + { +@@ -32,7 +33,20 @@ static int ext2_follow_link(struct dentr + return vfs_follow_link(nd, s); + } + ++struct inode_operations ext2_symlink_inode_operations = { ++ readlink: page_readlink, ++ follow_link: page_follow_link, ++ setxattr: ext2_setxattr, ++ getxattr: ext2_getxattr, ++ listxattr: ext2_listxattr, ++ removexattr: ext2_removexattr, ++}; ++ + struct inode_operations ext2_fast_symlink_inode_operations = { + readlink: ext2_readlink, + follow_link: ext2_follow_link, ++ setxattr: ext2_setxattr, ++ getxattr: ext2_getxattr, ++ listxattr: ext2_listxattr, ++ removexattr: ext2_removexattr, + }; +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/xattr.c 2003-05-07 18:08:03.000000000 +0800 +@@ -0,0 +1,1212 @@ ++/* ++ * linux/fs/ext2/xattr.c ++ * ++ * Copyright (C) 2001 by Andreas Gruenbacher, ++ * ++ * Fix by Harrison Xing . ++ * Extended attributes for symlinks and special files added per ++ * suggestion of Luka Renko . ++ */ ++ ++/* ++ * Extended attributes are stored on disk blocks allocated outside of ++ * any inode. The i_file_acl field is then made to point to this allocated ++ * block. If all extended attributes of an inode are identical, these ++ * inodes may share the same extended attribute block. Such situations ++ * are automatically detected by keeping a cache of recent attribute block ++ * numbers and hashes over the block's contents in memory. ++ * ++ * ++ * Extended attribute block layout: ++ * ++ * +------------------+ ++ * | header | ++ * | entry 1 | | ++ * | entry 2 | | growing downwards ++ * | entry 3 | v ++ * | four null bytes | ++ * | . . . | ++ * | value 1 | ^ ++ * | value 3 | | growing upwards ++ * | value 2 | | ++ * +------------------+ ++ * ++ * The block header is followed by multiple entry descriptors. These entry ++ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD ++ * byte boundaries. The entry descriptors are sorted by attribute name, ++ * so that two extended attribute blocks can be compared efficiently. ++ * ++ * Attribute values are aligned to the end of the block, stored in ++ * no specific order. They are also padded to EXT2_XATTR_PAD byte ++ * boundaries. No additional gaps are left between them. ++ * ++ * Locking strategy ++ * ---------------- ++ * The VFS already holds the BKL and the inode->i_sem semaphore when any of ++ * the xattr inode operations are called, so we are guaranteed that only one ++ * processes accesses extended attributes of an inode at any time. ++ * ++ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that ++ * only a single process is modifying an extended attribute block, even ++ * if the block is shared among inodes. ++ * ++ * Note for porting to 2.5 ++ * ----------------------- ++ * The BKL will no longer be held in the xattr inode operations. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* These symbols may be needed by a module. */ ++EXPORT_SYMBOL(ext2_xattr_register); ++EXPORT_SYMBOL(ext2_xattr_unregister); ++EXPORT_SYMBOL(ext2_xattr_get); ++EXPORT_SYMBOL(ext2_xattr_list); ++EXPORT_SYMBOL(ext2_xattr_set); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) ++# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) ++#endif ++ ++#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) ++#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) ++#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) ++#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) ++ ++#ifdef EXT2_XATTR_DEBUG ++# define ea_idebug(inode, f...) do { \ ++ printk(KERN_DEBUG "inode %s:%ld: ", \ ++ kdevname(inode->i_dev), inode->i_ino); \ ++ printk(f); \ ++ printk("\n"); \ ++ } while (0) ++# define ea_bdebug(bh, f...) do { \ ++ printk(KERN_DEBUG "block %s:%ld: ", \ ++ kdevname(bh->b_dev), bh->b_blocknr); \ ++ printk(f); \ ++ printk("\n"); \ ++ } while (0) ++#else ++# define ea_idebug(f...) ++# define ea_bdebug(f...) ++#endif ++ ++static int ext2_xattr_set2(struct inode *, struct buffer_head *, ++ struct ext2_xattr_header *); ++ ++#ifdef CONFIG_EXT2_FS_XATTR_SHARING ++ ++static int ext2_xattr_cache_insert(struct buffer_head *); ++static struct buffer_head *ext2_xattr_cache_find(struct inode *, ++ struct ext2_xattr_header *); ++static void ext2_xattr_cache_remove(struct buffer_head *); ++static void ext2_xattr_rehash(struct ext2_xattr_header *, ++ struct ext2_xattr_entry *); ++ ++static struct mb_cache *ext2_xattr_cache; ++ ++#else ++# define ext2_xattr_cache_insert(bh) 0 ++# define ext2_xattr_cache_find(inode, header) NULL ++# define ext2_xattr_cache_remove(bh) while(0) {} ++# define ext2_xattr_rehash(header, entry) while(0) {} ++#endif ++ ++/* ++ * If a file system does not share extended attributes among inodes, ++ * we should not need the ext2_xattr_sem semaphore. However, the ++ * filesystem may still contain shared blocks, so we always take ++ * the lock. ++ */ ++ ++DECLARE_MUTEX(ext2_xattr_sem); ++ ++static inline int ++ext2_xattr_new_block(struct inode *inode, int * errp, int force) ++{ ++ struct super_block *sb = inode->i_sb; ++ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + ++ EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); ++ ++ /* How can we enforce the allocation? */ ++ int block = ext2_new_block(inode, goal, 0, 0, errp); ++#ifdef OLD_QUOTAS ++ if (!*errp) ++ inode->i_blocks += inode->i_sb->s_blocksize >> 9; ++#endif ++ return block; ++} ++ ++static inline int ++ext2_xattr_quota_alloc(struct inode *inode, int force) ++{ ++ /* How can we enforce the allocation? */ ++#ifdef OLD_QUOTAS ++ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); ++ if (!error) ++ inode->i_blocks += inode->i_sb->s_blocksize >> 9; ++#else ++ int error = DQUOT_ALLOC_BLOCK(inode, 1); ++#endif ++ return error; ++} ++ ++#ifdef OLD_QUOTAS ++ ++static inline void ++ext2_xattr_quota_free(struct inode *inode) ++{ ++ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); ++ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; ++} ++ ++static inline void ++ext2_xattr_free_block(struct inode * inode, unsigned long block) ++{ ++ ext2_free_blocks(inode, block, 1); ++ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; ++} ++ ++#else ++# define ext2_xattr_quota_free(inode) \ ++ DQUOT_FREE_BLOCK(inode, 1) ++# define ext2_xattr_free_block(inode, block) \ ++ ext2_free_blocks(inode, block, 1) ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) ++ ++static inline struct buffer_head * ++sb_bread(struct super_block *sb, int block) ++{ ++ return bread(sb->s_dev, block, sb->s_blocksize); ++} ++ ++static inline struct buffer_head * ++sb_getblk(struct super_block *sb, int block) ++{ ++ return getblk(sb->s_dev, block, sb->s_blocksize); ++} ++ ++#endif ++ ++struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; ++rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; ++ ++int ++ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler) ++{ ++ int error = -EINVAL; ++ ++ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { ++ write_lock(&ext2_handler_lock); ++ if (!ext2_xattr_handlers[name_index-1]) { ++ ext2_xattr_handlers[name_index-1] = handler; ++ error = 0; ++ } ++ write_unlock(&ext2_handler_lock); ++ } ++ return error; ++} ++ ++void ++ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler) ++{ ++ if (name_index > 0 || name_index <= EXT2_XATTR_INDEX_MAX) { ++ write_lock(&ext2_handler_lock); ++ ext2_xattr_handlers[name_index-1] = NULL; ++ write_unlock(&ext2_handler_lock); ++ } ++} ++ ++static inline const char * ++strcmp_prefix(const char *a, const char *a_prefix) ++{ ++ while (*a_prefix && *a == *a_prefix) { ++ a++; ++ a_prefix++; ++ } ++ return *a_prefix ? NULL : a; ++} ++ ++/* ++ * Decode the extended attribute name, and translate it into ++ * the name_index and name suffix. ++ */ ++static struct ext2_xattr_handler * ++ext2_xattr_resolve_name(const char **name) ++{ ++ struct ext2_xattr_handler *handler = NULL; ++ int i; ++ ++ if (!*name) ++ return NULL; ++ read_lock(&ext2_handler_lock); ++ for (i=0; iprefix); ++ if (n) { ++ handler = ext2_xattr_handlers[i]; ++ *name = n; ++ break; ++ } ++ } ++ } ++ read_unlock(&ext2_handler_lock); ++ return handler; ++} ++ ++static inline struct ext2_xattr_handler * ++ext2_xattr_handler(int name_index) ++{ ++ struct ext2_xattr_handler *handler = NULL; ++ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { ++ read_lock(&ext2_handler_lock); ++ handler = ext2_xattr_handlers[name_index-1]; ++ read_unlock(&ext2_handler_lock); ++ } ++ return handler; ++} ++ ++/* ++ * Inode operation getxattr() ++ * ++ * dentry->d_inode->i_sem down ++ * BKL held [before 2.5.x] ++ */ ++ssize_t ++ext2_getxattr(struct dentry *dentry, const char *name, ++ void *buffer, size_t size) ++{ ++ struct ext2_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ handler = ext2_xattr_resolve_name(&name); ++ if (!handler) ++ return -ENOTSUP; ++ return handler->get(inode, name, buffer, size); ++} ++ ++/* ++ * Inode operation listxattr() ++ * ++ * dentry->d_inode->i_sem down ++ * BKL held [before 2.5.x] ++ */ ++ssize_t ++ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) ++{ ++ return ext2_xattr_list(dentry->d_inode, buffer, size); ++} ++ ++/* ++ * Inode operation setxattr() ++ * ++ * dentry->d_inode->i_sem down ++ * BKL held [before 2.5.x] ++ */ ++int ++ext2_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ struct ext2_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ if (size == 0) ++ value = ""; /* empty EA, do not remove */ ++ handler = ext2_xattr_resolve_name(&name); ++ if (!handler) ++ return -ENOTSUP; ++ return handler->set(inode, name, value, size, flags); ++} ++ ++/* ++ * Inode operation removexattr() ++ * ++ * dentry->d_inode->i_sem down ++ * BKL held [before 2.5.x] ++ */ ++int ++ext2_removexattr(struct dentry *dentry, const char *name) ++{ ++ struct ext2_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ handler = ext2_xattr_resolve_name(&name); ++ if (!handler) ++ return -ENOTSUP; ++ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); ++} ++ ++/* ++ * ext2_xattr_get() ++ * ++ * Copy an extended attribute into the buffer ++ * provided, or compute the buffer size required. ++ * Buffer is NULL to compute the size of the buffer required. ++ * ++ * Returns a negative error number on failure, or the number of bytes ++ * used / required on success. ++ */ ++int ++ext2_xattr_get(struct inode *inode, int name_index, const char *name, ++ void *buffer, size_t buffer_size) ++{ ++ struct buffer_head *bh = NULL; ++ struct ext2_xattr_entry *entry; ++ unsigned int block, size; ++ char *end; ++ int name_len, error; ++ ++ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", ++ name_index, name, buffer, (long)buffer_size); ++ ++ if (name == NULL) ++ return -EINVAL; ++ if (!EXT2_I(inode)->i_file_acl) ++ return -ENOATTR; ++ block = EXT2_I(inode)->i_file_acl; ++ ea_idebug(inode, "reading block %d", block); ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) ++ return -EIO; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); ++ end = bh->b_data + bh->b_size; ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* find named attribute */ ++ name_len = strlen(name); ++ ++ error = -ERANGE; ++ if (name_len > 255) ++ goto cleanup; ++ entry = FIRST_ENTRY(bh); ++ while (!IS_LAST_ENTRY(entry)) { ++ struct ext2_xattr_entry *next = ++ EXT2_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (name_index == entry->e_name_index && ++ name_len == entry->e_name_len && ++ memcmp(name, entry->e_name, name_len) == 0) ++ goto found; ++ entry = next; ++ } ++ /* Check the remaining name entries */ ++ while (!IS_LAST_ENTRY(entry)) { ++ struct ext2_xattr_entry *next = ++ EXT2_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ entry = next; ++ } ++ if (ext2_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ error = -ENOATTR; ++ goto cleanup; ++found: ++ /* check the buffer size */ ++ if (entry->e_value_block != 0) ++ goto bad_block; ++ size = le32_to_cpu(entry->e_value_size); ++ if (size > inode->i_sb->s_blocksize || ++ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) ++ goto bad_block; ++ ++ if (ext2_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ if (buffer) { ++ error = -ERANGE; ++ if (size > buffer_size) ++ goto cleanup; ++ /* return value of attribute */ ++ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), ++ size); ++ } ++ error = size; ++ ++cleanup: ++ brelse(bh); ++ ++ return error; ++} ++ ++/* ++ * ext2_xattr_list() ++ * ++ * Copy a list of attribute names into the buffer ++ * provided, or compute the buffer size required. ++ * Buffer is NULL to compute the size of the buffer required. ++ * ++ * Returns a negative error number on failure, or the number of bytes ++ * used / required on success. ++ */ ++int ++ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ++{ ++ struct buffer_head *bh = NULL; ++ struct ext2_xattr_entry *entry; ++ unsigned int block, size = 0; ++ char *buf, *end; ++ int error; ++ ++ ea_idebug(inode, "buffer=%p, buffer_size=%ld", ++ buffer, (long)buffer_size); ++ ++ if (!EXT2_I(inode)->i_file_acl) ++ return 0; ++ block = EXT2_I(inode)->i_file_acl; ++ ea_idebug(inode, "reading block %d", block); ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) ++ return -EIO; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); ++ end = bh->b_data + bh->b_size; ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* compute the size required for the list of attribute names */ ++ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); ++ entry = EXT2_XATTR_NEXT(entry)) { ++ struct ext2_xattr_handler *handler; ++ struct ext2_xattr_entry *next = ++ EXT2_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ ++ handler = ext2_xattr_handler(entry->e_name_index); ++ if (handler) ++ size += handler->list(NULL, inode, entry->e_name, ++ entry->e_name_len); ++ } ++ ++ if (ext2_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ if (!buffer) { ++ error = size; ++ goto cleanup; ++ } else { ++ error = -ERANGE; ++ if (size > buffer_size) ++ goto cleanup; ++ } ++ ++ /* list the attribute names */ ++ buf = buffer; ++ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); ++ entry = EXT2_XATTR_NEXT(entry)) { ++ struct ext2_xattr_handler *handler; ++ ++ handler = ext2_xattr_handler(entry->e_name_index); ++ if (handler) ++ buf += handler->list(buf, inode, entry->e_name, ++ entry->e_name_len); ++ } ++ error = size; ++ ++cleanup: ++ brelse(bh); ++ ++ return error; ++} ++ ++/* ++ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is ++ * not set, set it. ++ */ ++static void ext2_xattr_update_super_block(struct super_block *sb) ++{ ++ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) ++ return; ++ ++ lock_super(sb); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) ++ EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR; ++#endif ++ EXT2_SB(sb)->s_es->s_feature_compat |= ++ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); ++ sb->s_dirt = 1; ++ mark_buffer_dirty(EXT2_SB(sb)->s_sbh); ++ unlock_super(sb); ++} ++ ++/* ++ * ext2_xattr_set() ++ * ++ * Create, replace or remove an extended attribute for this inode. Buffer ++ * is NULL to remove an existing extended attribute, and non-NULL to ++ * either replace an existing extended attribute, or create a new extended ++ * attribute. The flags XATTR_REPLACE and XATTR_CREATE ++ * specify that an extended attribute must exist and must not exist ++ * previous to the call, respectively. ++ * ++ * Returns 0, or a negative error number on failure. ++ */ ++int ++ext2_xattr_set(struct inode *inode, int name_index, const char *name, ++ const void *value, size_t value_len, int flags) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct buffer_head *bh = NULL; ++ struct ext2_xattr_header *header = NULL; ++ struct ext2_xattr_entry *here, *last; ++ unsigned int name_len; ++ int block = EXT2_I(inode)->i_file_acl; ++ int min_offs = sb->s_blocksize, not_found = 1, free, error; ++ char *end; ++ ++ /* ++ * header -- Points either into bh, or to a temporarily ++ * allocated buffer. ++ * here -- The named entry found, or the place for inserting, within ++ * the block pointed to by header. ++ * last -- Points right after the last named entry within the block ++ * pointed to by header. ++ * min_offs -- The offset of the first value (values are aligned ++ * towards the end of the block). ++ * end -- Points right after the block pointed to by header. ++ */ ++ ++ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", ++ name_index, name, value, (long)value_len); ++ ++ if (IS_RDONLY(inode)) ++ return -EROFS; ++ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) ++ return -EPERM; ++ if (value == NULL) ++ value_len = 0; ++ if (name == NULL) ++ return -EINVAL; ++ name_len = strlen(name); ++ if (name_len > 255 || value_len > sb->s_blocksize) ++ return -ERANGE; ++ down(&ext2_xattr_sem); ++ ++ if (block) { ++ /* The inode already has an extended attribute block. */ ++ ++ bh = sb_bread(sb, block); ++ error = -EIO; ++ if (!bh) ++ goto cleanup; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), ++ le32_to_cpu(HDR(bh)->h_refcount)); ++ header = HDR(bh); ++ end = bh->b_data + bh->b_size; ++ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || ++ header->h_blocks != cpu_to_le32(1)) { ++bad_block: ext2_error(sb, "ext2_xattr_set", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* Find the named attribute. */ ++ here = FIRST_ENTRY(bh); ++ while (!IS_LAST_ENTRY(here)) { ++ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (!here->e_value_block && here->e_value_size) { ++ int offs = le16_to_cpu(here->e_value_offs); ++ if (offs < min_offs) ++ min_offs = offs; ++ } ++ not_found = name_index - here->e_name_index; ++ if (!not_found) ++ not_found = name_len - here->e_name_len; ++ if (!not_found) ++ not_found = memcmp(name, here->e_name,name_len); ++ if (not_found <= 0) ++ break; ++ here = next; ++ } ++ last = here; ++ /* We still need to compute min_offs and last. */ ++ while (!IS_LAST_ENTRY(last)) { ++ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (!last->e_value_block && last->e_value_size) { ++ int offs = le16_to_cpu(last->e_value_offs); ++ if (offs < min_offs) ++ min_offs = offs; ++ } ++ last = next; ++ } ++ ++ /* Check whether we have enough space left. */ ++ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); ++ } else { ++ /* We will use a new extended attribute block. */ ++ free = sb->s_blocksize - ++ sizeof(struct ext2_xattr_header) - sizeof(__u32); ++ here = last = NULL; /* avoid gcc uninitialized warning. */ ++ } ++ ++ if (not_found) { ++ /* Request to remove a nonexistent attribute? */ ++ error = -ENOATTR; ++ if (flags & XATTR_REPLACE) ++ goto cleanup; ++ error = 0; ++ if (value == NULL) ++ goto cleanup; ++ else ++ free -= EXT2_XATTR_LEN(name_len); ++ } else { ++ /* Request to create an existing attribute? */ ++ error = -EEXIST; ++ if (flags & XATTR_CREATE) ++ goto cleanup; ++ if (!here->e_value_block && here->e_value_size) { ++ unsigned int size = le32_to_cpu(here->e_value_size); ++ ++ if (le16_to_cpu(here->e_value_offs) + size > ++ sb->s_blocksize || size > sb->s_blocksize) ++ goto bad_block; ++ free += EXT2_XATTR_SIZE(size); ++ } ++ } ++ free -= EXT2_XATTR_SIZE(value_len); ++ error = -ENOSPC; ++ if (free < 0) ++ goto cleanup; ++ ++ /* Here we know that we can set the new attribute. */ ++ ++ if (header) { ++ if (header->h_refcount == cpu_to_le32(1)) { ++ ea_bdebug(bh, "modifying in-place"); ++ ext2_xattr_cache_remove(bh); ++ } else { ++ int offset; ++ ++ ea_bdebug(bh, "cloning"); ++ header = kmalloc(bh->b_size, GFP_KERNEL); ++ error = -ENOMEM; ++ if (header == NULL) ++ goto cleanup; ++ memcpy(header, HDR(bh), bh->b_size); ++ header->h_refcount = cpu_to_le32(1); ++ offset = (char *)header - bh->b_data; ++ here = ENTRY((char *)here + offset); ++ last = ENTRY((char *)last + offset); ++ } ++ } else { ++ /* Allocate a buffer where we construct the new block. */ ++ header = kmalloc(sb->s_blocksize, GFP_KERNEL); ++ error = -ENOMEM; ++ if (header == NULL) ++ goto cleanup; ++ memset(header, 0, sb->s_blocksize); ++ end = (char *)header + sb->s_blocksize; ++ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); ++ header->h_blocks = header->h_refcount = cpu_to_le32(1); ++ last = here = ENTRY(header+1); ++ } ++ ++ if (not_found) { ++ /* Insert the new name. */ ++ int size = EXT2_XATTR_LEN(name_len); ++ int rest = (char *)last - (char *)here; ++ memmove((char *)here + size, here, rest); ++ memset(here, 0, size); ++ here->e_name_index = name_index; ++ here->e_name_len = name_len; ++ memcpy(here->e_name, name, name_len); ++ } else { ++ /* Remove the old value. */ ++ if (!here->e_value_block && here->e_value_size) { ++ char *first_val = (char *)header + min_offs; ++ int offs = le16_to_cpu(here->e_value_offs); ++ char *val = (char *)header + offs; ++ size_t size = EXT2_XATTR_SIZE( ++ le32_to_cpu(here->e_value_size)); ++ memmove(first_val + size, first_val, val - first_val); ++ memset(first_val, 0, size); ++ here->e_value_offs = 0; ++ min_offs += size; ++ ++ /* Adjust all value offsets. */ ++ last = ENTRY(header+1); ++ while (!IS_LAST_ENTRY(last)) { ++ int o = le16_to_cpu(last->e_value_offs); ++ if (!last->e_value_block && o < offs) ++ last->e_value_offs = ++ cpu_to_le16(o + size); ++ last = EXT2_XATTR_NEXT(last); ++ } ++ } ++ if (value == NULL) { ++ /* Remove this attribute. */ ++ if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { ++ /* This block is now empty. */ ++ error = ext2_xattr_set2(inode, bh, NULL); ++ goto cleanup; ++ } else { ++ /* Remove the old name. */ ++ int size = EXT2_XATTR_LEN(name_len); ++ last = ENTRY((char *)last - size); ++ memmove(here, (char*)here + size, ++ (char*)last - (char*)here); ++ memset(last, 0, size); ++ } ++ } ++ } ++ ++ if (value != NULL) { ++ /* Insert the new value. */ ++ here->e_value_size = cpu_to_le32(value_len); ++ if (value_len) { ++ size_t size = EXT2_XATTR_SIZE(value_len); ++ char *val = (char *)header + min_offs - size; ++ here->e_value_offs = ++ cpu_to_le16((char *)val - (char *)header); ++ memset(val + size - EXT2_XATTR_PAD, 0, ++ EXT2_XATTR_PAD); /* Clear the pad bytes. */ ++ memcpy(val, value, value_len); ++ } ++ } ++ ext2_xattr_rehash(header, here); ++ ++ error = ext2_xattr_set2(inode, bh, header); ++ ++cleanup: ++ brelse(bh); ++ if (!(bh && header == HDR(bh))) ++ kfree(header); ++ up(&ext2_xattr_sem); ++ ++ return error; ++} ++ ++/* ++ * Second half of ext2_xattr_set(): Update the file system. ++ */ ++static int ++ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, ++ struct ext2_xattr_header *header) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct buffer_head *new_bh = NULL; ++ int error; ++ ++ if (header) { ++ new_bh = ext2_xattr_cache_find(inode, header); ++ if (new_bh) { ++ /* ++ * We found an identical block in the cache. ++ * The old block will be released after updating ++ * the inode. ++ */ ++ ea_bdebug(old_bh, "reusing block %ld", ++ new_bh->b_blocknr); ++ ++ error = -EDQUOT; ++ if (ext2_xattr_quota_alloc(inode, 1)) ++ goto cleanup; ++ ++ HDR(new_bh)->h_refcount = cpu_to_le32( ++ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ++ ea_bdebug(new_bh, "refcount now=%d", ++ le32_to_cpu(HDR(new_bh)->h_refcount)); ++ } else if (old_bh && header == HDR(old_bh)) { ++ /* Keep this block. */ ++ new_bh = old_bh; ++ ext2_xattr_cache_insert(new_bh); ++ } else { ++ /* We need to allocate a new block */ ++ int force = EXT2_I(inode)->i_file_acl != 0; ++ int block = ext2_xattr_new_block(inode, &error, force); ++ if (error) ++ goto cleanup; ++ ea_idebug(inode, "creating block %d", block); ++ ++ new_bh = sb_getblk(sb, block); ++ if (!new_bh) { ++ ext2_xattr_free_block(inode, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(new_bh); ++ memcpy(new_bh->b_data, header, new_bh->b_size); ++ mark_buffer_uptodate(new_bh, 1); ++ unlock_buffer(new_bh); ++ ext2_xattr_cache_insert(new_bh); ++ ++ ext2_xattr_update_super_block(sb); ++ } ++ mark_buffer_dirty(new_bh); ++ if (IS_SYNC(inode)) { ++ ll_rw_block(WRITE, 1, &new_bh); ++ wait_on_buffer(new_bh); ++ error = -EIO; ++ if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) ++ goto cleanup; ++ } ++ } ++ ++ /* Update the inode. */ ++ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; ++ inode->i_ctime = CURRENT_TIME; ++ if (IS_SYNC(inode)) { ++ error = ext2_sync_inode (inode); ++ if (error) ++ goto cleanup; ++ } else ++ mark_inode_dirty(inode); ++ ++ error = 0; ++ if (old_bh && old_bh != new_bh) { ++ /* ++ * If there was an old block, and we are not still using it, ++ * we now release the old block. ++ */ ++ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); ++ ++ if (refcount == 1) { ++ /* Free the old block. */ ++ ea_bdebug(old_bh, "freeing"); ++ ext2_xattr_free_block(inode, old_bh->b_blocknr); ++ mark_buffer_clean(old_bh); ++ } else { ++ /* Decrement the refcount only. */ ++ refcount--; ++ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); ++ ext2_xattr_quota_free(inode); ++ mark_buffer_dirty(old_bh); ++ ea_bdebug(old_bh, "refcount now=%d", refcount); ++ } ++ } ++ ++cleanup: ++ if (old_bh != new_bh) ++ brelse(new_bh); ++ ++ return error; ++} ++ ++/* ++ * ext2_xattr_delete_inode() ++ * ++ * Free extended attribute resources associated with this inode. This ++ * is called immediately before an inode is freed. ++ */ ++void ++ext2_xattr_delete_inode(struct inode *inode) ++{ ++ struct buffer_head *bh; ++ unsigned int block = EXT2_I(inode)->i_file_acl; ++ ++ if (!block) ++ return; ++ down(&ext2_xattr_sem); ++ ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) { ++ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", ++ "inode %ld: block %d read error", inode->i_ino, block); ++ goto cleanup; ++ } ++ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ goto cleanup; ++ } ++ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); ++ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ++ ext2_xattr_cache_remove(bh); ++ ext2_xattr_free_block(inode, block); ++ bforget(bh); ++ bh = NULL; ++ } else { ++ HDR(bh)->h_refcount = cpu_to_le32( ++ le32_to_cpu(HDR(bh)->h_refcount) - 1); ++ mark_buffer_dirty(bh); ++ if (IS_SYNC(inode)) { ++ ll_rw_block(WRITE, 1, &bh); ++ wait_on_buffer(bh); ++ } ++ ext2_xattr_quota_free(inode); ++ } ++ EXT2_I(inode)->i_file_acl = 0; ++ ++cleanup: ++ brelse(bh); ++ up(&ext2_xattr_sem); ++} ++ ++/* ++ * ext2_xattr_put_super() ++ * ++ * This is called when a file system is unmounted. ++ */ ++void ++ext2_xattr_put_super(struct super_block *sb) ++{ ++#ifdef CONFIG_EXT2_FS_XATTR_SHARING ++ mb_cache_shrink(ext2_xattr_cache, sb->s_dev); ++#endif ++} ++ ++#ifdef CONFIG_EXT2_FS_XATTR_SHARING ++ ++/* ++ * ext2_xattr_cache_insert() ++ * ++ * Create a new entry in the extended attribute cache, and insert ++ * it unless such an entry is already in the cache. ++ * ++ * Returns 0, or a negative error number on failure. ++ */ ++static int ++ext2_xattr_cache_insert(struct buffer_head *bh) ++{ ++ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); ++ struct mb_cache_entry *ce; ++ int error; ++ ++ ce = mb_cache_entry_alloc(ext2_xattr_cache); ++ if (!ce) ++ return -ENOMEM; ++ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); ++ if (error) { ++ mb_cache_entry_free(ce); ++ if (error == -EBUSY) { ++ ea_bdebug(bh, "already in cache (%d cache entries)", ++ atomic_read(&ext2_xattr_cache->c_entry_count)); ++ error = 0; ++ } ++ } else { ++ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, ++ atomic_read(&ext2_xattr_cache->c_entry_count)); ++ mb_cache_entry_release(ce); ++ } ++ return error; ++} ++ ++/* ++ * ext2_xattr_cmp() ++ * ++ * Compare two extended attribute blocks for equality. ++ * ++ * Returns 0 if the blocks are equal, 1 if they differ, and ++ * a negative error number on errors. ++ */ ++static int ++ext2_xattr_cmp(struct ext2_xattr_header *header1, ++ struct ext2_xattr_header *header2) ++{ ++ struct ext2_xattr_entry *entry1, *entry2; ++ ++ entry1 = ENTRY(header1+1); ++ entry2 = ENTRY(header2+1); ++ while (!IS_LAST_ENTRY(entry1)) { ++ if (IS_LAST_ENTRY(entry2)) ++ return 1; ++ if (entry1->e_hash != entry2->e_hash || ++ entry1->e_name_len != entry2->e_name_len || ++ entry1->e_value_size != entry2->e_value_size || ++ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) ++ return 1; ++ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) ++ return -EIO; ++ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), ++ (char *)header2 + le16_to_cpu(entry2->e_value_offs), ++ le32_to_cpu(entry1->e_value_size))) ++ return 1; ++ ++ entry1 = EXT2_XATTR_NEXT(entry1); ++ entry2 = EXT2_XATTR_NEXT(entry2); ++ } ++ if (!IS_LAST_ENTRY(entry2)) ++ return 1; ++ return 0; ++} ++ ++/* ++ * ext2_xattr_cache_find() ++ * ++ * Find an identical extended attribute block. ++ * ++ * Returns a pointer to the block found, or NULL if such a block was ++ * not found or an error occurred. ++ */ ++static struct buffer_head * ++ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) ++{ ++ __u32 hash = le32_to_cpu(header->h_hash); ++ struct mb_cache_entry *ce; ++ ++ if (!header->h_hash) ++ return NULL; /* never share */ ++ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); ++ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash); ++ while (ce) { ++ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); ++ ++ if (!bh) { ++ ext2_error(inode->i_sb, "ext2_xattr_cache_find", ++ "inode %ld: block %ld read error", ++ inode->i_ino, ce->e_block); ++ } else if (le32_to_cpu(HDR(bh)->h_refcount) > ++ EXT2_XATTR_REFCOUNT_MAX) { ++ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, ++ le32_to_cpu(HDR(bh)->h_refcount), ++ EXT2_XATTR_REFCOUNT_MAX); ++ } else if (!ext2_xattr_cmp(header, HDR(bh))) { ++ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); ++ mb_cache_entry_release(ce); ++ return bh; ++ } ++ brelse(bh); ++ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); ++ } ++ return NULL; ++} ++ ++/* ++ * ext2_xattr_cache_remove() ++ * ++ * Remove the cache entry of a block from the cache. Called when a ++ * block becomes invalid. ++ */ ++static void ++ext2_xattr_cache_remove(struct buffer_head *bh) ++{ ++ struct mb_cache_entry *ce; ++ ++ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr); ++ if (ce) { ++ ea_bdebug(bh, "removing (%d cache entries remaining)", ++ atomic_read(&ext2_xattr_cache->c_entry_count)-1); ++ mb_cache_entry_free(ce); ++ } else ++ ea_bdebug(bh, "no cache entry"); ++} ++ ++#define NAME_HASH_SHIFT 5 ++#define VALUE_HASH_SHIFT 16 ++ ++/* ++ * ext2_xattr_hash_entry() ++ * ++ * Compute the hash of an extended attribute. ++ */ ++static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, ++ struct ext2_xattr_entry *entry) ++{ ++ __u32 hash = 0; ++ char *name = entry->e_name; ++ int n; ++ ++ for (n=0; n < entry->e_name_len; n++) { ++ hash = (hash << NAME_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ ++ *name++; ++ } ++ ++ if (entry->e_value_block == 0 && entry->e_value_size != 0) { ++ __u32 *value = (__u32 *)((char *)header + ++ le16_to_cpu(entry->e_value_offs)); ++ for (n = (le32_to_cpu(entry->e_value_size) + ++ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { ++ hash = (hash << VALUE_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ ++ le32_to_cpu(*value++); ++ } ++ } ++ entry->e_hash = cpu_to_le32(hash); ++} ++ ++#undef NAME_HASH_SHIFT ++#undef VALUE_HASH_SHIFT ++ ++#define BLOCK_HASH_SHIFT 16 ++ ++/* ++ * ext2_xattr_rehash() ++ * ++ * Re-compute the extended attribute hash value after an entry has changed. ++ */ ++static void ext2_xattr_rehash(struct ext2_xattr_header *header, ++ struct ext2_xattr_entry *entry) ++{ ++ struct ext2_xattr_entry *here; ++ __u32 hash = 0; ++ ++ ext2_xattr_hash_entry(header, entry); ++ here = ENTRY(header+1); ++ while (!IS_LAST_ENTRY(here)) { ++ if (!here->e_hash) { ++ /* Block is not shared if an entry's hash value == 0 */ ++ hash = 0; ++ break; ++ } ++ hash = (hash << BLOCK_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ ++ le32_to_cpu(here->e_hash); ++ here = EXT2_XATTR_NEXT(here); ++ } ++ header->h_hash = cpu_to_le32(hash); ++} ++ ++#undef BLOCK_HASH_SHIFT ++ ++int __init ++init_ext2_xattr(void) ++{ ++ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, ++ sizeof(struct mb_cache_entry) + ++ sizeof(struct mb_cache_entry_index), 1, 61); ++ if (!ext2_xattr_cache) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++void ++exit_ext2_xattr(void) ++{ ++ mb_cache_destroy(ext2_xattr_cache); ++} ++ ++#else /* CONFIG_EXT2_FS_XATTR_SHARING */ ++ ++int __init ++init_ext2_xattr(void) ++{ ++ return 0; ++} ++ ++void ++exit_ext2_xattr(void) ++{ ++} ++ ++#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/fs/ext2/xattr_user.c 2003-05-07 18:08:03.000000000 +0800 +@@ -0,0 +1,103 @@ ++/* ++ * linux/fs/ext2/xattr_user.c ++ * Handler for extended user attributes. ++ * ++ * Copyright (C) 2001 by Andreas Gruenbacher, ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_EXT2_FS_POSIX_ACL ++# include ++#endif ++ ++#define XATTR_USER_PREFIX "user." ++ ++static size_t ++ext2_xattr_user_list(char *list, struct inode *inode, ++ const char *name, int name_len) ++{ ++ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; ++ ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return 0; ++ ++ if (list) { ++ memcpy(list, XATTR_USER_PREFIX, prefix_len); ++ memcpy(list+prefix_len, name, name_len); ++ list[prefix_len + name_len] = '\0'; ++ } ++ return prefix_len + name_len + 1; ++} ++ ++static int ++ext2_xattr_user_get(struct inode *inode, const char *name, ++ void *buffer, size_t size) ++{ ++ int error; ++ ++ if (strcmp(name, "") == 0) ++ return -EINVAL; ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return -ENOTSUP; ++#ifdef CONFIG_EXT2_FS_POSIX_ACL ++ error = ext2_permission_locked(inode, MAY_READ); ++#else ++ error = permission(inode, MAY_READ); ++#endif ++ if (error) ++ return error; ++ ++ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, ++ buffer, size); ++} ++ ++static int ++ext2_xattr_user_set(struct inode *inode, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ int error; ++ ++ if (strcmp(name, "") == 0) ++ return -EINVAL; ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return -ENOTSUP; ++ if ( !S_ISREG(inode->i_mode) && ++ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) ++ return -EPERM; ++#ifdef CONFIG_EXT2_FS_POSIX_ACL ++ error = ext2_permission_locked(inode, MAY_WRITE); ++#else ++ error = permission(inode, MAY_WRITE); ++#endif ++ if (error) ++ return error; ++ ++ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, ++ value, size, flags); ++} ++ ++struct ext2_xattr_handler ext2_xattr_user_handler = { ++ prefix: XATTR_USER_PREFIX, ++ list: ext2_xattr_user_list, ++ get: ext2_xattr_user_get, ++ set: ext2_xattr_user_set, ++}; ++ ++int __init ++init_ext2_xattr_user(void) ++{ ++ return ext2_xattr_register(EXT2_XATTR_INDEX_USER, ++ &ext2_xattr_user_handler); ++} ++ ++void ++exit_ext2_xattr_user(void) ++{ ++ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, ++ &ext2_xattr_user_handler); ++} +--- linux-2.4.20/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:02.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/Makefile 2003-05-07 18:10:33.000000000 +0800 +@@ -1,5 +1,5 @@ + # +-# Makefile for the linux ext2-filesystem routines. ++# Makefile for the linux ext3-filesystem routines. + # + # Note! Dependencies are done automagically by 'make dep', which also + # removes any old dependencies. DON'T put your own dependencies here +@@ -9,10 +9,14 @@ + + O_TARGET := ext3.o + +-export-objs := super.o inode.o ++export-objs := ext3-exports.o + + obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +- ioctl.o namei.o super.o symlink.o hash.o ++ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o + obj-m := $(O_TARGET) + ++export-objs += xattr.o ++obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o ++obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o ++ + include $(TOPDIR)/Rules.make +--- linux-2.4.20/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:02.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/file.c 2003-05-07 18:08:03.000000000 +0800 +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -126,5 +127,9 @@ struct file_operations ext3_file_operati + struct inode_operations ext3_file_inode_operations = { + truncate: ext3_truncate, /* BKL held */ + setattr: ext3_setattr, /* BKL held */ ++ setxattr: ext3_setxattr, /* BKL held */ ++ getxattr: ext3_getxattr, /* BKL held */ ++ listxattr: ext3_listxattr, /* BKL held */ ++ removexattr: ext3_removexattr, /* BKL held */ + }; + +--- linux-2.4.20/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/ialloc.c 2003-05-07 18:08:03.000000000 +0800 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, + * as writing the quota to disk may need the lock as well. + */ + DQUOT_INIT(inode); ++ ext3_xattr_delete_inode(handle, inode); + DQUOT_FREE_INODE(inode); + DQUOT_DROP(inode); + +--- linux-2.4.20/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/inode.c 2003-05-07 18:08:03.000000000 +0800 +@@ -39,6 +39,18 @@ + */ + #undef SEARCH_FROM_ZERO + ++/* ++ * Test whether an inode is a fast symlink. ++ */ ++static inline int ext3_inode_is_fast_symlink(struct inode *inode) ++{ ++ int ea_blocks = inode->u.ext3_i.i_file_acl ? ++ (inode->i_sb->s_blocksize >> 9) : 0; ++ ++ return (S_ISLNK(inode->i_mode) && ++ inode->i_blocks - ea_blocks == 0); ++} ++ + /* The ext3 forget function must perform a revoke if we are freeing data + * which has been journaled. Metadata (eg. indirect blocks) must be + * revoked in all cases. +@@ -48,7 +60,7 @@ + * still needs to be revoked. + */ + +-static int ext3_forget(handle_t *handle, int is_metadata, ++int ext3_forget(handle_t *handle, int is_metadata, + struct inode *inode, struct buffer_head *bh, + int blocknr) + { +@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i + { + handle_t *handle; + +- if (is_bad_inode(inode) || +- inode->i_ino == EXT3_ACL_IDX_INO || +- inode->i_ino == EXT3_ACL_DATA_INO) ++ if (is_bad_inode(inode)) + goto no_delete; + + lock_kernel(); +@@ -1855,6 +1865,8 @@ void ext3_truncate(struct inode * inode) + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; ++ if (ext3_inode_is_fast_symlink(inode)) ++ return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + +@@ -2002,8 +2014,6 @@ int ext3_get_inode_loc (struct inode *in + struct ext3_group_desc * gdp; + + if ((inode->i_ino != EXT3_ROOT_INO && +- inode->i_ino != EXT3_ACL_IDX_INO && +- inode->i_ino != EXT3_ACL_DATA_INO && + inode->i_ino != EXT3_JOURNAL_INO && + inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || + inode->i_ino > le32_to_cpu( +@@ -2130,10 +2140,7 @@ void ext3_read_inode(struct inode * inod + + brelse (iloc.bh); + +- if (inode->i_ino == EXT3_ACL_IDX_INO || +- inode->i_ino == EXT3_ACL_DATA_INO) +- /* Nothing to do */ ; +- else if (S_ISREG(inode->i_mode)) { ++ if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext3_file_inode_operations; + inode->i_fop = &ext3_file_operations; + inode->i_mapping->a_ops = &ext3_aops; +@@ -2141,15 +2148,17 @@ void ext3_read_inode(struct inode * inod + inode->i_op = &ext3_dir_inode_operations; + inode->i_fop = &ext3_dir_operations; + } else if (S_ISLNK(inode->i_mode)) { +- if (!inode->i_blocks) ++ if (ext3_inode_is_fast_symlink(inode)) + inode->i_op = &ext3_fast_symlink_inode_operations; + else { +- inode->i_op = &page_symlink_inode_operations; ++ inode->i_op = &ext3_symlink_inode_operations; + inode->i_mapping->a_ops = &ext3_aops; + } +- } else ++ } else { ++ inode->i_op = &ext3_special_inode_operations; + init_special_inode(inode, inode->i_mode, + le32_to_cpu(iloc.raw_inode->i_block[0])); ++ } + ext3_set_inode_flags(inode); + return; + +--- linux-2.4.20/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:05.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/namei.c 2003-05-07 18:08:03.000000000 +0800 +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1611,7 +1612,7 @@ static int ext3_mkdir(struct inode * dir + if (IS_SYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, S_IFDIR); ++ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_stop; +@@ -1619,7 +1620,6 @@ static int ext3_mkdir(struct inode * dir + inode->i_op = &ext3_dir_inode_operations; + inode->i_fop = &ext3_dir_operations; + inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; +- inode->i_blocks = 0; + dir_block = ext3_bread (handle, inode, 0, 1, &err); + if (!dir_block) { + inode->i_nlink--; /* is this nlink == 0? */ +@@ -1646,9 +1646,6 @@ static int ext3_mkdir(struct inode * dir + BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); + ext3_journal_dirty_metadata(handle, dir_block); + brelse (dir_block); +- inode->i_mode = S_IFDIR | mode; +- if (dir->i_mode & S_ISGID) +- inode->i_mode |= S_ISGID; + ext3_mark_inode_dirty(handle, inode); + err = ext3_add_entry (handle, dentry, inode); + if (err) { +@@ -2017,7 +2014,7 @@ static int ext3_symlink (struct inode * + goto out_stop; + + if (l > sizeof (EXT3_I(inode)->i_data)) { +- inode->i_op = &page_symlink_inode_operations; ++ inode->i_op = &ext3_symlink_inode_operations; + inode->i_mapping->a_ops = &ext3_aops; + /* + * block_symlink() calls back into ext3_prepare/commit_write. +@@ -2244,4 +2241,16 @@ struct inode_operations ext3_dir_inode_o + rmdir: ext3_rmdir, /* BKL held */ + mknod: ext3_mknod, /* BKL held */ + rename: ext3_rename, /* BKL held */ ++ setxattr: ext3_setxattr, /* BKL held */ ++ getxattr: ext3_getxattr, /* BKL held */ ++ listxattr: ext3_listxattr, /* BKL held */ ++ removexattr: ext3_removexattr, /* BKL held */ + }; ++ ++struct inode_operations ext3_special_inode_operations = { ++ setxattr: ext3_setxattr, /* BKL held */ ++ getxattr: ext3_getxattr, /* BKL held */ ++ listxattr: ext3_listxattr, /* BKL held */ ++ removexattr: ext3_removexattr, /* BKL held */ ++}; ++ +--- linux-2.4.20/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:02.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/super.c 2003-05-07 18:08:39.000000000 +0800 +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -404,6 +405,7 @@ void ext3_put_super (struct super_block + kdev_t j_dev = sbi->s_journal->j_dev; + int i; + ++ ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); + if (!(sb->s_flags & MS_RDONLY)) { + EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); +@@ -499,6 +501,7 @@ static int parse_options (char * options + int is_remount) + { + unsigned long *mount_options = &sbi->s_mount_opt; ++ + uid_t *resuid = &sbi->s_resuid; + gid_t *resgid = &sbi->s_resgid; + char * this_char; +@@ -511,6 +514,13 @@ static int parse_options (char * options + this_char = strtok (NULL, ",")) { + if ((value = strchr (this_char, '=')) != NULL) + *value++ = 0; ++#ifdef CONFIG_EXT3_FS_XATTR_USER ++ if (!strcmp (this_char, "user_xattr")) ++ set_opt (*mount_options, XATTR_USER); ++ else if (!strcmp (this_char, "nouser_xattr")) ++ clear_opt (*mount_options, XATTR_USER); ++ else ++#endif + if (!strcmp (this_char, "bsddf")) + clear_opt (*mount_options, MINIX_DF); + else if (!strcmp (this_char, "nouid32")) { +@@ -928,6 +938,12 @@ struct super_block * ext3_read_super (st + sbi->s_mount_opt = 0; + sbi->s_resuid = EXT3_DEF_RESUID; + sbi->s_resgid = EXT3_DEF_RESGID; ++ ++ /* Default extended attribute flags */ ++#ifdef CONFIG_EXT3_FS_XATTR_USER ++ /* set_opt(sbi->s_mount_opt, XATTR_USER); */ ++#endif ++ + if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { + sb->s_dev = 0; + goto out_fail; +@@ -1767,17 +1783,29 @@ static DECLARE_FSTYPE_DEV(ext3_fs_type, + + static int __init init_ext3_fs(void) + { +- return register_filesystem(&ext3_fs_type); ++ int error = init_ext3_xattr(); ++ if (error) ++ return error; ++ error = init_ext3_xattr_user(); ++ if (error) ++ goto fail; ++ error = register_filesystem(&ext3_fs_type); ++ if (!error) ++ return 0; ++ ++ exit_ext3_xattr_user(); ++fail: ++ exit_ext3_xattr(); ++ return error; + } + + static void __exit exit_ext3_fs(void) + { + unregister_filesystem(&ext3_fs_type); ++ exit_ext3_xattr_user(); ++ exit_ext3_xattr(); + } + +-EXPORT_SYMBOL(ext3_force_commit); +-EXPORT_SYMBOL(ext3_bread); +- + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); + MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); + MODULE_LICENSE("GPL"); +--- linux-2.4.20/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54 2001-11-10 06:25:04.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/symlink.c 2003-05-07 18:08:03.000000000 +0800 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) + { +@@ -33,7 +34,20 @@ static int ext3_follow_link(struct dentr + return vfs_follow_link(nd, s); + } + ++struct inode_operations ext3_symlink_inode_operations = { ++ readlink: page_readlink, /* BKL not held. Don't need */ ++ follow_link: page_follow_link, /* BKL not held. Don't need */ ++ setxattr: ext3_setxattr, /* BKL held */ ++ getxattr: ext3_getxattr, /* BKL held */ ++ listxattr: ext3_listxattr, /* BKL held */ ++ removexattr: ext3_removexattr, /* BKL held */ ++}; ++ + struct inode_operations ext3_fast_symlink_inode_operations = { + readlink: ext3_readlink, /* BKL not held. Don't need */ + follow_link: ext3_follow_link, /* BKL not held. Don't need */ ++ setxattr: ext3_setxattr, /* BKL held */ ++ getxattr: ext3_getxattr, /* BKL held */ ++ listxattr: ext3_listxattr, /* BKL held */ ++ removexattr: ext3_removexattr, /* BKL held */ + }; +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/xattr.c 2003-05-07 18:09:23.000000000 +0800 +@@ -0,0 +1,1225 @@ ++/* ++ * linux/fs/ext3/xattr.c ++ * ++ * Copyright (C) 2001 by Andreas Gruenbacher, ++ * ++ * Fix by Harrison Xing . ++ * Ext3 code with a lot of help from Eric Jarman . ++ * Extended attributes for symlinks and special files added per ++ * suggestion of Luka Renko . ++ */ ++ ++/* ++ * Extended attributes are stored on disk blocks allocated outside of ++ * any inode. The i_file_acl field is then made to point to this allocated ++ * block. If all extended attributes of an inode are identical, these ++ * inodes may share the same extended attribute block. Such situations ++ * are automatically detected by keeping a cache of recent attribute block ++ * numbers and hashes over the block's contents in memory. ++ * ++ * ++ * Extended attribute block layout: ++ * ++ * +------------------+ ++ * | header | ++ * | entry 1 | | ++ * | entry 2 | | growing downwards ++ * | entry 3 | v ++ * | four null bytes | ++ * | . . . | ++ * | value 1 | ^ ++ * | value 3 | | growing upwards ++ * | value 2 | | ++ * +------------------+ ++ * ++ * The block header is followed by multiple entry descriptors. These entry ++ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD ++ * byte boundaries. The entry descriptors are sorted by attribute name, ++ * so that two extended attribute blocks can be compared efficiently. ++ * ++ * Attribute values are aligned to the end of the block, stored in ++ * no specific order. They are also padded to EXT3_XATTR_PAD byte ++ * boundaries. No additional gaps are left between them. ++ * ++ * Locking strategy ++ * ---------------- ++ * The VFS already holds the BKL and the inode->i_sem semaphore when any of ++ * the xattr inode operations are called, so we are guaranteed that only one ++ * processes accesses extended attributes of an inode at any time. ++ * ++ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that ++ * only a single process is modifying an extended attribute block, even ++ * if the block is shared among inodes. ++ * ++ * Note for porting to 2.5 ++ * ----------------------- ++ * The BKL will no longer be held in the xattr inode operations. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define EXT3_EA_USER "user." ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) ++# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) ++#endif ++ ++#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) ++#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) ++#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) ++#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) ++ ++#ifdef EXT3_XATTR_DEBUG ++# define ea_idebug(inode, f...) do { \ ++ printk(KERN_DEBUG "inode %s:%ld: ", \ ++ kdevname(inode->i_dev), inode->i_ino); \ ++ printk(f); \ ++ printk("\n"); \ ++ } while (0) ++# define ea_bdebug(bh, f...) do { \ ++ printk(KERN_DEBUG "block %s:%ld: ", \ ++ kdevname(bh->b_dev), bh->b_blocknr); \ ++ printk(f); \ ++ printk("\n"); \ ++ } while (0) ++#else ++# define ea_idebug(f...) ++# define ea_bdebug(f...) ++#endif ++ ++static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, ++ struct ext3_xattr_header *); ++ ++#ifdef CONFIG_EXT3_FS_XATTR_SHARING ++ ++static int ext3_xattr_cache_insert(struct buffer_head *); ++static struct buffer_head *ext3_xattr_cache_find(struct inode *, ++ struct ext3_xattr_header *); ++static void ext3_xattr_cache_remove(struct buffer_head *); ++static void ext3_xattr_rehash(struct ext3_xattr_header *, ++ struct ext3_xattr_entry *); ++ ++static struct mb_cache *ext3_xattr_cache; ++ ++#else ++# define ext3_xattr_cache_insert(bh) 0 ++# define ext3_xattr_cache_find(inode, header) NULL ++# define ext3_xattr_cache_remove(bh) while(0) {} ++# define ext3_xattr_rehash(header, entry) while(0) {} ++#endif ++ ++/* ++ * If a file system does not share extended attributes among inodes, ++ * we should not need the ext3_xattr_sem semaphore. However, the ++ * filesystem may still contain shared blocks, so we always take ++ * the lock. ++ */ ++ ++DECLARE_MUTEX(ext3_xattr_sem); ++ ++static inline int ++ext3_xattr_new_block(handle_t *handle, struct inode *inode, ++ int * errp, int force) ++{ ++ struct super_block *sb = inode->i_sb; ++ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + ++ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); ++ ++ /* How can we enforce the allocation? */ ++ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); ++#ifdef OLD_QUOTAS ++ if (!*errp) ++ inode->i_blocks += inode->i_sb->s_blocksize >> 9; ++#endif ++ return block; ++} ++ ++static inline int ++ext3_xattr_quota_alloc(struct inode *inode, int force) ++{ ++ /* How can we enforce the allocation? */ ++#ifdef OLD_QUOTAS ++ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); ++ if (!error) ++ inode->i_blocks += inode->i_sb->s_blocksize >> 9; ++#else ++ int error = DQUOT_ALLOC_BLOCK(inode, 1); ++#endif ++ return error; ++} ++ ++#ifdef OLD_QUOTAS ++ ++static inline void ++ext3_xattr_quota_free(struct inode *inode) ++{ ++ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); ++ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; ++} ++ ++static inline void ++ext3_xattr_free_block(handle_t *handle, struct inode * inode, ++ unsigned long block) ++{ ++ ext3_free_blocks(handle, inode, block, 1); ++ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; ++} ++ ++#else ++# define ext3_xattr_quota_free(inode) \ ++ DQUOT_FREE_BLOCK(inode, 1) ++# define ext3_xattr_free_block(handle, inode, block) \ ++ ext3_free_blocks(handle, inode, block, 1) ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) ++ ++static inline struct buffer_head * ++sb_bread(struct super_block *sb, int block) ++{ ++ return bread(sb->s_dev, block, sb->s_blocksize); ++} ++ ++static inline struct buffer_head * ++sb_getblk(struct super_block *sb, int block) ++{ ++ return getblk(sb->s_dev, block, sb->s_blocksize); ++} ++ ++#endif ++ ++struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; ++rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; ++ ++int ++ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) ++{ ++ int error = -EINVAL; ++ ++ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { ++ write_lock(&ext3_handler_lock); ++ if (!ext3_xattr_handlers[name_index-1]) { ++ ext3_xattr_handlers[name_index-1] = handler; ++ error = 0; ++ } ++ write_unlock(&ext3_handler_lock); ++ } ++ return error; ++} ++ ++void ++ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) ++{ ++ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { ++ write_lock(&ext3_handler_lock); ++ ext3_xattr_handlers[name_index-1] = NULL; ++ write_unlock(&ext3_handler_lock); ++ } ++} ++ ++static inline const char * ++strcmp_prefix(const char *a, const char *a_prefix) ++{ ++ while (*a_prefix && *a == *a_prefix) { ++ a++; ++ a_prefix++; ++ } ++ return *a_prefix ? NULL : a; ++} ++ ++/* ++ * Decode the extended attribute name, and translate it into ++ * the name_index and name suffix. ++ */ ++static inline struct ext3_xattr_handler * ++ext3_xattr_resolve_name(const char **name) ++{ ++ struct ext3_xattr_handler *handler = NULL; ++ int i; ++ ++ if (!*name) ++ return NULL; ++ read_lock(&ext3_handler_lock); ++ for (i=0; iprefix); ++ if (n) { ++ handler = ext3_xattr_handlers[i]; ++ *name = n; ++ break; ++ } ++ } ++ } ++ read_unlock(&ext3_handler_lock); ++ return handler; ++} ++ ++static inline struct ext3_xattr_handler * ++ext3_xattr_handler(int name_index) ++{ ++ struct ext3_xattr_handler *handler = NULL; ++ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { ++ read_lock(&ext3_handler_lock); ++ handler = ext3_xattr_handlers[name_index-1]; ++ read_unlock(&ext3_handler_lock); ++ } ++ return handler; ++} ++ ++/* ++ * Inode operation getxattr() ++ * ++ * dentry->d_inode->i_sem down ++ * BKL held [before 2.5.x] ++ */ ++ssize_t ++ext3_getxattr(struct dentry *dentry, const char *name, ++ void *buffer, size_t size) ++{ ++ struct ext3_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ handler = ext3_xattr_resolve_name(&name); ++ if (!handler) ++ return -ENOTSUP; ++ return handler->get(inode, name, buffer, size); ++} ++ ++/* ++ * Inode operation listxattr() ++ * ++ * dentry->d_inode->i_sem down ++ * BKL held [before 2.5.x] ++ */ ++ssize_t ++ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) ++{ ++ return ext3_xattr_list(dentry->d_inode, buffer, size); ++} ++ ++/* ++ * Inode operation setxattr() ++ * ++ * dentry->d_inode->i_sem down ++ * BKL held [before 2.5.x] ++ */ ++int ++ext3_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ struct ext3_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ if (size == 0) ++ value = ""; /* empty EA, do not remove */ ++ handler = ext3_xattr_resolve_name(&name); ++ if (!handler) ++ return -ENOTSUP; ++ return handler->set(inode, name, value, size, flags); ++} ++ ++/* ++ * Inode operation removexattr() ++ * ++ * dentry->d_inode->i_sem down ++ * BKL held [before 2.5.x] ++ */ ++int ++ext3_removexattr(struct dentry *dentry, const char *name) ++{ ++ struct ext3_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ handler = ext3_xattr_resolve_name(&name); ++ if (!handler) ++ return -ENOTSUP; ++ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); ++} ++ ++/* ++ * ext3_xattr_get() ++ * ++ * Copy an extended attribute into the buffer ++ * provided, or compute the buffer size required. ++ * Buffer is NULL to compute the size of the buffer required. ++ * ++ * Returns a negative error number on failure, or the number of bytes ++ * used / required on success. ++ */ ++int ++ext3_xattr_get(struct inode *inode, int name_index, const char *name, ++ void *buffer, size_t buffer_size) ++{ ++ struct buffer_head *bh = NULL; ++ struct ext3_xattr_entry *entry; ++ unsigned int block, size; ++ char *end; ++ int name_len, error; ++ ++ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", ++ name_index, name, buffer, (long)buffer_size); ++ ++ if (name == NULL) ++ return -EINVAL; ++ if (!EXT3_I(inode)->i_file_acl) ++ return -ENOATTR; ++ block = EXT3_I(inode)->i_file_acl; ++ ea_idebug(inode, "reading block %d", block); ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) ++ return -EIO; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); ++ end = bh->b_data + bh->b_size; ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* find named attribute */ ++ name_len = strlen(name); ++ ++ error = -ERANGE; ++ if (name_len > 255) ++ goto cleanup; ++ entry = FIRST_ENTRY(bh); ++ while (!IS_LAST_ENTRY(entry)) { ++ struct ext3_xattr_entry *next = ++ EXT3_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (name_index == entry->e_name_index && ++ name_len == entry->e_name_len && ++ memcmp(name, entry->e_name, name_len) == 0) ++ goto found; ++ entry = next; ++ } ++ /* Check the remaining name entries */ ++ while (!IS_LAST_ENTRY(entry)) { ++ struct ext3_xattr_entry *next = ++ EXT3_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ entry = next; ++ } ++ if (ext3_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ error = -ENOATTR; ++ goto cleanup; ++found: ++ /* check the buffer size */ ++ if (entry->e_value_block != 0) ++ goto bad_block; ++ size = le32_to_cpu(entry->e_value_size); ++ if (size > inode->i_sb->s_blocksize || ++ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) ++ goto bad_block; ++ ++ if (ext3_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ if (buffer) { ++ error = -ERANGE; ++ if (size > buffer_size) ++ goto cleanup; ++ /* return value of attribute */ ++ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), ++ size); ++ } ++ error = size; ++ ++cleanup: ++ brelse(bh); ++ ++ return error; ++} ++ ++/* ++ * ext3_xattr_list() ++ * ++ * Copy a list of attribute names into the buffer ++ * provided, or compute the buffer size required. ++ * Buffer is NULL to compute the size of the buffer required. ++ * ++ * Returns a negative error number on failure, or the number of bytes ++ * used / required on success. ++ */ ++int ++ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ++{ ++ struct buffer_head *bh = NULL; ++ struct ext3_xattr_entry *entry; ++ unsigned int block, size = 0; ++ char *buf, *end; ++ int error; ++ ++ ea_idebug(inode, "buffer=%p, buffer_size=%ld", ++ buffer, (long)buffer_size); ++ ++ if (!EXT3_I(inode)->i_file_acl) ++ return 0; ++ block = EXT3_I(inode)->i_file_acl; ++ ea_idebug(inode, "reading block %d", block); ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) ++ return -EIO; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); ++ end = bh->b_data + bh->b_size; ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* compute the size required for the list of attribute names */ ++ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); ++ entry = EXT3_XATTR_NEXT(entry)) { ++ struct ext3_xattr_handler *handler; ++ struct ext3_xattr_entry *next = ++ EXT3_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ ++ handler = ext3_xattr_handler(entry->e_name_index); ++ if (handler) ++ size += handler->list(NULL, inode, entry->e_name, ++ entry->e_name_len); ++ } ++ ++ if (ext3_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ if (!buffer) { ++ error = size; ++ goto cleanup; ++ } else { ++ error = -ERANGE; ++ if (size > buffer_size) ++ goto cleanup; ++ } ++ ++ /* list the attribute names */ ++ buf = buffer; ++ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); ++ entry = EXT3_XATTR_NEXT(entry)) { ++ struct ext3_xattr_handler *handler; ++ ++ handler = ext3_xattr_handler(entry->e_name_index); ++ if (handler) ++ buf += handler->list(buf, inode, entry->e_name, ++ entry->e_name_len); ++ } ++ error = size; ++ ++cleanup: ++ brelse(bh); ++ ++ return error; ++} ++ ++/* ++ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is ++ * not set, set it. ++ */ ++static void ext3_xattr_update_super_block(handle_t *handle, ++ struct super_block *sb) ++{ ++ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) ++ return; ++ ++ lock_super(sb); ++ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) ++ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; ++#endif ++ EXT3_SB(sb)->s_es->s_feature_compat |= ++ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); ++ sb->s_dirt = 1; ++ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); ++ unlock_super(sb); ++} ++ ++/* ++ * ext3_xattr_set() ++ * ++ * Create, replace or remove an extended attribute for this inode. Buffer ++ * is NULL to remove an existing extended attribute, and non-NULL to ++ * either replace an existing extended attribute, or create a new extended ++ * attribute. The flags XATTR_REPLACE and XATTR_CREATE ++ * specify that an extended attribute must exist and must not exist ++ * previous to the call, respectively. ++ * ++ * Returns 0, or a negative error number on failure. ++ */ ++int ++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, ++ const char *name, const void *value, size_t value_len, int flags) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct buffer_head *bh = NULL; ++ struct ext3_xattr_header *header = NULL; ++ struct ext3_xattr_entry *here, *last; ++ unsigned int name_len; ++ int block = EXT3_I(inode)->i_file_acl; ++ int min_offs = sb->s_blocksize, not_found = 1, free, error; ++ char *end; ++ ++ /* ++ * header -- Points either into bh, or to a temporarily ++ * allocated buffer. ++ * here -- The named entry found, or the place for inserting, within ++ * the block pointed to by header. ++ * last -- Points right after the last named entry within the block ++ * pointed to by header. ++ * min_offs -- The offset of the first value (values are aligned ++ * towards the end of the block). ++ * end -- Points right after the block pointed to by header. ++ */ ++ ++ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", ++ name_index, name, value, (long)value_len); ++ ++ if (IS_RDONLY(inode)) ++ return -EROFS; ++ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) ++ return -EPERM; ++ if (value == NULL) ++ value_len = 0; ++ if (name == NULL) ++ return -EINVAL; ++ name_len = strlen(name); ++ if (name_len > 255 || value_len > sb->s_blocksize) ++ return -ERANGE; ++ down(&ext3_xattr_sem); ++ ++ if (block) { ++ /* The inode already has an extended attribute block. */ ++ bh = sb_bread(sb, block); ++ error = -EIO; ++ if (!bh) ++ goto cleanup; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), ++ le32_to_cpu(HDR(bh)->h_refcount)); ++ header = HDR(bh); ++ end = bh->b_data + bh->b_size; ++ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || ++ header->h_blocks != cpu_to_le32(1)) { ++bad_block: ext3_error(sb, "ext3_xattr_set", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* Find the named attribute. */ ++ here = FIRST_ENTRY(bh); ++ while (!IS_LAST_ENTRY(here)) { ++ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (!here->e_value_block && here->e_value_size) { ++ int offs = le16_to_cpu(here->e_value_offs); ++ if (offs < min_offs) ++ min_offs = offs; ++ } ++ not_found = name_index - here->e_name_index; ++ if (!not_found) ++ not_found = name_len - here->e_name_len; ++ if (!not_found) ++ not_found = memcmp(name, here->e_name,name_len); ++ if (not_found <= 0) ++ break; ++ here = next; ++ } ++ last = here; ++ /* We still need to compute min_offs and last. */ ++ while (!IS_LAST_ENTRY(last)) { ++ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (!last->e_value_block && last->e_value_size) { ++ int offs = le16_to_cpu(last->e_value_offs); ++ if (offs < min_offs) ++ min_offs = offs; ++ } ++ last = next; ++ } ++ ++ /* Check whether we have enough space left. */ ++ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); ++ } else { ++ /* We will use a new extended attribute block. */ ++ free = sb->s_blocksize - ++ sizeof(struct ext3_xattr_header) - sizeof(__u32); ++ here = last = NULL; /* avoid gcc uninitialized warning. */ ++ } ++ ++ if (not_found) { ++ /* Request to remove a nonexistent attribute? */ ++ error = -ENOATTR; ++ if (flags & XATTR_REPLACE) ++ goto cleanup; ++ error = 0; ++ if (value == NULL) ++ goto cleanup; ++ else ++ free -= EXT3_XATTR_LEN(name_len); ++ } else { ++ /* Request to create an existing attribute? */ ++ error = -EEXIST; ++ if (flags & XATTR_CREATE) ++ goto cleanup; ++ if (!here->e_value_block && here->e_value_size) { ++ unsigned int size = le32_to_cpu(here->e_value_size); ++ ++ if (le16_to_cpu(here->e_value_offs) + size > ++ sb->s_blocksize || size > sb->s_blocksize) ++ goto bad_block; ++ free += EXT3_XATTR_SIZE(size); ++ } ++ } ++ free -= EXT3_XATTR_SIZE(value_len); ++ error = -ENOSPC; ++ if (free < 0) ++ goto cleanup; ++ ++ /* Here we know that we can set the new attribute. */ ++ ++ if (header) { ++ if (header->h_refcount == cpu_to_le32(1)) { ++ ea_bdebug(bh, "modifying in-place"); ++ ext3_xattr_cache_remove(bh); ++ error = ext3_journal_get_write_access(handle, bh); ++ if (error) ++ goto cleanup; ++ } else { ++ int offset; ++ ++ ea_bdebug(bh, "cloning"); ++ header = kmalloc(bh->b_size, GFP_KERNEL); ++ error = -ENOMEM; ++ if (header == NULL) ++ goto cleanup; ++ memcpy(header, HDR(bh), bh->b_size); ++ header->h_refcount = cpu_to_le32(1); ++ offset = (char *)header - bh->b_data; ++ here = ENTRY((char *)here + offset); ++ last = ENTRY((char *)last + offset); ++ } ++ } else { ++ /* Allocate a buffer where we construct the new block. */ ++ header = kmalloc(sb->s_blocksize, GFP_KERNEL); ++ error = -ENOMEM; ++ if (header == NULL) ++ goto cleanup; ++ memset(header, 0, sb->s_blocksize); ++ end = (char *)header + sb->s_blocksize; ++ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); ++ header->h_blocks = header->h_refcount = cpu_to_le32(1); ++ last = here = ENTRY(header+1); ++ } ++ ++ if (not_found) { ++ /* Insert the new name. */ ++ int size = EXT3_XATTR_LEN(name_len); ++ int rest = (char *)last - (char *)here; ++ memmove((char *)here + size, here, rest); ++ memset(here, 0, size); ++ here->e_name_index = name_index; ++ here->e_name_len = name_len; ++ memcpy(here->e_name, name, name_len); ++ } else { ++ /* Remove the old value. */ ++ if (!here->e_value_block && here->e_value_size) { ++ char *first_val = (char *)header + min_offs; ++ int offs = le16_to_cpu(here->e_value_offs); ++ char *val = (char *)header + offs; ++ size_t size = EXT3_XATTR_SIZE( ++ le32_to_cpu(here->e_value_size)); ++ memmove(first_val + size, first_val, val - first_val); ++ memset(first_val, 0, size); ++ here->e_value_offs = 0; ++ min_offs += size; ++ ++ /* Adjust all value offsets. */ ++ last = ENTRY(header+1); ++ while (!IS_LAST_ENTRY(last)) { ++ int o = le16_to_cpu(last->e_value_offs); ++ if (!last->e_value_block && o < offs) ++ last->e_value_offs = ++ cpu_to_le16(o + size); ++ last = EXT3_XATTR_NEXT(last); ++ } ++ } ++ if (value == NULL) { ++ /* Remove this attribute. */ ++ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { ++ /* This block is now empty. */ ++ error = ext3_xattr_set2(handle, inode, bh,NULL); ++ goto cleanup; ++ } else { ++ /* Remove the old name. */ ++ int size = EXT3_XATTR_LEN(name_len); ++ last = ENTRY((char *)last - size); ++ memmove(here, (char*)here + size, ++ (char*)last - (char*)here); ++ memset(last, 0, size); ++ } ++ } ++ } ++ ++ if (value != NULL) { ++ /* Insert the new value. */ ++ here->e_value_size = cpu_to_le32(value_len); ++ if (value_len) { ++ size_t size = EXT3_XATTR_SIZE(value_len); ++ char *val = (char *)header + min_offs - size; ++ here->e_value_offs = ++ cpu_to_le16((char *)val - (char *)header); ++ memset(val + size - EXT3_XATTR_PAD, 0, ++ EXT3_XATTR_PAD); /* Clear the pad bytes. */ ++ memcpy(val, value, value_len); ++ } ++ } ++ ext3_xattr_rehash(header, here); ++ ++ error = ext3_xattr_set2(handle, inode, bh, header); ++ ++cleanup: ++ brelse(bh); ++ if (!(bh && header == HDR(bh))) ++ kfree(header); ++ up(&ext3_xattr_sem); ++ ++ return error; ++} ++ ++/* ++ * Second half of ext3_xattr_set(): Update the file system. ++ */ ++static int ++ext3_xattr_set2(handle_t *handle, struct inode *inode, ++ struct buffer_head *old_bh, struct ext3_xattr_header *header) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct buffer_head *new_bh = NULL; ++ int error; ++ ++ if (header) { ++ new_bh = ext3_xattr_cache_find(inode, header); ++ if (new_bh) { ++ /* ++ * We found an identical block in the cache. ++ * The old block will be released after updating ++ * the inode. ++ */ ++ ea_bdebug(old_bh, "reusing block %ld", ++ new_bh->b_blocknr); ++ ++ error = -EDQUOT; ++ if (ext3_xattr_quota_alloc(inode, 1)) ++ goto cleanup; ++ ++ error = ext3_journal_get_write_access(handle, new_bh); ++ if (error) ++ goto cleanup; ++ HDR(new_bh)->h_refcount = cpu_to_le32( ++ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ++ ea_bdebug(new_bh, "refcount now=%d", ++ le32_to_cpu(HDR(new_bh)->h_refcount)); ++ } else if (old_bh && header == HDR(old_bh)) { ++ /* Keep this block. */ ++ new_bh = old_bh; ++ ext3_xattr_cache_insert(new_bh); ++ } else { ++ /* We need to allocate a new block */ ++ int force = EXT3_I(inode)->i_file_acl != 0; ++ int block = ext3_xattr_new_block(handle, inode, ++ &error, force); ++ if (error) ++ goto cleanup; ++ ea_idebug(inode, "creating block %d", block); ++ ++ new_bh = sb_getblk(sb, block); ++ if (!new_bh) { ++getblk_failed: ext3_xattr_free_block(handle, inode, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(new_bh); ++ error = ext3_journal_get_create_access(handle, new_bh); ++ if (error) { ++ unlock_buffer(new_bh); ++ goto getblk_failed; ++ } ++ memcpy(new_bh->b_data, header, new_bh->b_size); ++ mark_buffer_uptodate(new_bh, 1); ++ unlock_buffer(new_bh); ++ ext3_xattr_cache_insert(new_bh); ++ ++ ext3_xattr_update_super_block(handle, sb); ++ } ++ error = ext3_journal_dirty_metadata(handle, new_bh); ++ if (error) ++ goto cleanup; ++ } ++ ++ /* Update the inode. */ ++ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; ++ inode->i_ctime = CURRENT_TIME; ++ ext3_mark_inode_dirty(handle, inode); ++ if (IS_SYNC(inode)) ++ handle->h_sync = 1; ++ ++ error = 0; ++ if (old_bh && old_bh != new_bh) { ++ /* ++ * If there was an old block, and we are not still using it, ++ * we now release the old block. ++ */ ++ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); ++ ++ error = ext3_journal_get_write_access(handle, old_bh); ++ if (error) ++ goto cleanup; ++ if (refcount == 1) { ++ /* Free the old block. */ ++ ea_bdebug(old_bh, "freeing"); ++ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); ++ ++ /* ext3_forget() calls bforget() for us, but we ++ let our caller release old_bh, so we need to ++ duplicate the handle before. */ ++ get_bh(old_bh); ++ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); ++ } else { ++ /* Decrement the refcount only. */ ++ refcount--; ++ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); ++ ext3_xattr_quota_free(inode); ++ ext3_journal_dirty_metadata(handle, old_bh); ++ ea_bdebug(old_bh, "refcount now=%d", refcount); ++ } ++ } ++ ++cleanup: ++ if (old_bh != new_bh) ++ brelse(new_bh); ++ ++ return error; ++} ++ ++/* ++ * ext3_xattr_delete_inode() ++ * ++ * Free extended attribute resources associated with this inode. This ++ * is called immediately before an inode is freed. ++ */ ++void ++ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) ++{ ++ struct buffer_head *bh; ++ unsigned int block = EXT3_I(inode)->i_file_acl; ++ ++ if (!block) ++ return; ++ down(&ext3_xattr_sem); ++ ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) { ++ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", ++ "inode %ld: block %d read error", inode->i_ino, block); ++ goto cleanup; ++ } ++ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ goto cleanup; ++ } ++ ext3_journal_get_write_access(handle, bh); ++ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); ++ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ++ ext3_xattr_cache_remove(bh); ++ ext3_xattr_free_block(handle, inode, block); ++ ext3_forget(handle, 1, inode, bh, block); ++ bh = NULL; ++ } else { ++ HDR(bh)->h_refcount = cpu_to_le32( ++ le32_to_cpu(HDR(bh)->h_refcount) - 1); ++ ext3_journal_dirty_metadata(handle, bh); ++ if (IS_SYNC(inode)) ++ handle->h_sync = 1; ++ ext3_xattr_quota_free(inode); ++ } ++ EXT3_I(inode)->i_file_acl = 0; ++ ++cleanup: ++ brelse(bh); ++ up(&ext3_xattr_sem); ++} ++ ++/* ++ * ext3_xattr_put_super() ++ * ++ * This is called when a file system is unmounted. ++ */ ++void ++ext3_xattr_put_super(struct super_block *sb) ++{ ++#ifdef CONFIG_EXT3_FS_XATTR_SHARING ++ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); ++#endif ++} ++ ++#ifdef CONFIG_EXT3_FS_XATTR_SHARING ++ ++/* ++ * ext3_xattr_cache_insert() ++ * ++ * Create a new entry in the extended attribute cache, and insert ++ * it unless such an entry is already in the cache. ++ * ++ * Returns 0, or a negative error number on failure. ++ */ ++static int ++ext3_xattr_cache_insert(struct buffer_head *bh) ++{ ++ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); ++ struct mb_cache_entry *ce; ++ int error; ++ ++ ce = mb_cache_entry_alloc(ext3_xattr_cache); ++ if (!ce) ++ return -ENOMEM; ++ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); ++ if (error) { ++ mb_cache_entry_free(ce); ++ if (error == -EBUSY) { ++ ea_bdebug(bh, "already in cache (%d cache entries)", ++ atomic_read(&ext3_xattr_cache->c_entry_count)); ++ error = 0; ++ } ++ } else { ++ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, ++ atomic_read(&ext3_xattr_cache->c_entry_count)); ++ mb_cache_entry_release(ce); ++ } ++ return error; ++} ++ ++/* ++ * ext3_xattr_cmp() ++ * ++ * Compare two extended attribute blocks for equality. ++ * ++ * Returns 0 if the blocks are equal, 1 if they differ, and ++ * a negative error number on errors. ++ */ ++static int ++ext3_xattr_cmp(struct ext3_xattr_header *header1, ++ struct ext3_xattr_header *header2) ++{ ++ struct ext3_xattr_entry *entry1, *entry2; ++ ++ entry1 = ENTRY(header1+1); ++ entry2 = ENTRY(header2+1); ++ while (!IS_LAST_ENTRY(entry1)) { ++ if (IS_LAST_ENTRY(entry2)) ++ return 1; ++ if (entry1->e_hash != entry2->e_hash || ++ entry1->e_name_len != entry2->e_name_len || ++ entry1->e_value_size != entry2->e_value_size || ++ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) ++ return 1; ++ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) ++ return -EIO; ++ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), ++ (char *)header2 + le16_to_cpu(entry2->e_value_offs), ++ le32_to_cpu(entry1->e_value_size))) ++ return 1; ++ ++ entry1 = EXT3_XATTR_NEXT(entry1); ++ entry2 = EXT3_XATTR_NEXT(entry2); ++ } ++ if (!IS_LAST_ENTRY(entry2)) ++ return 1; ++ return 0; ++} ++ ++/* ++ * ext3_xattr_cache_find() ++ * ++ * Find an identical extended attribute block. ++ * ++ * Returns a pointer to the block found, or NULL if such a block was ++ * not found or an error occurred. ++ */ ++static struct buffer_head * ++ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) ++{ ++ __u32 hash = le32_to_cpu(header->h_hash); ++ struct mb_cache_entry *ce; ++ ++ if (!header->h_hash) ++ return NULL; /* never share */ ++ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); ++ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); ++ while (ce) { ++ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); ++ ++ if (!bh) { ++ ext3_error(inode->i_sb, "ext3_xattr_cache_find", ++ "inode %ld: block %ld read error", ++ inode->i_ino, ce->e_block); ++ } else if (le32_to_cpu(HDR(bh)->h_refcount) > ++ EXT3_XATTR_REFCOUNT_MAX) { ++ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, ++ le32_to_cpu(HDR(bh)->h_refcount), ++ EXT3_XATTR_REFCOUNT_MAX); ++ } else if (!ext3_xattr_cmp(header, HDR(bh))) { ++ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); ++ mb_cache_entry_release(ce); ++ return bh; ++ } ++ brelse(bh); ++ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); ++ } ++ return NULL; ++} ++ ++/* ++ * ext3_xattr_cache_remove() ++ * ++ * Remove the cache entry of a block from the cache. Called when a ++ * block becomes invalid. ++ */ ++static void ++ext3_xattr_cache_remove(struct buffer_head *bh) ++{ ++ struct mb_cache_entry *ce; ++ ++ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); ++ if (ce) { ++ ea_bdebug(bh, "removing (%d cache entries remaining)", ++ atomic_read(&ext3_xattr_cache->c_entry_count)-1); ++ mb_cache_entry_free(ce); ++ } else ++ ea_bdebug(bh, "no cache entry"); ++} ++ ++#define NAME_HASH_SHIFT 5 ++#define VALUE_HASH_SHIFT 16 ++ ++/* ++ * ext3_xattr_hash_entry() ++ * ++ * Compute the hash of an extended attribute. ++ */ ++static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, ++ struct ext3_xattr_entry *entry) ++{ ++ __u32 hash = 0; ++ char *name = entry->e_name; ++ int n; ++ ++ for (n=0; n < entry->e_name_len; n++) { ++ hash = (hash << NAME_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ ++ *name++; ++ } ++ ++ if (entry->e_value_block == 0 && entry->e_value_size != 0) { ++ __u32 *value = (__u32 *)((char *)header + ++ le16_to_cpu(entry->e_value_offs)); ++ for (n = (le32_to_cpu(entry->e_value_size) + ++ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { ++ hash = (hash << VALUE_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ ++ le32_to_cpu(*value++); ++ } ++ } ++ entry->e_hash = cpu_to_le32(hash); ++} ++ ++#undef NAME_HASH_SHIFT ++#undef VALUE_HASH_SHIFT ++ ++#define BLOCK_HASH_SHIFT 16 ++ ++/* ++ * ext3_xattr_rehash() ++ * ++ * Re-compute the extended attribute hash value after an entry has changed. ++ */ ++static void ext3_xattr_rehash(struct ext3_xattr_header *header, ++ struct ext3_xattr_entry *entry) ++{ ++ struct ext3_xattr_entry *here; ++ __u32 hash = 0; ++ ++ ext3_xattr_hash_entry(header, entry); ++ here = ENTRY(header+1); ++ while (!IS_LAST_ENTRY(here)) { ++ if (!here->e_hash) { ++ /* Block is not shared if an entry's hash value == 0 */ ++ hash = 0; ++ break; ++ } ++ hash = (hash << BLOCK_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ ++ le32_to_cpu(here->e_hash); ++ here = EXT3_XATTR_NEXT(here); ++ } ++ header->h_hash = cpu_to_le32(hash); ++} ++ ++#undef BLOCK_HASH_SHIFT ++ ++int __init ++init_ext3_xattr(void) ++{ ++ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, ++ sizeof(struct mb_cache_entry) + ++ sizeof(struct mb_cache_entry_index), 1, 61); ++ if (!ext3_xattr_cache) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++void ++exit_ext3_xattr(void) ++{ ++ if (ext3_xattr_cache) ++ mb_cache_destroy(ext3_xattr_cache); ++ ext3_xattr_cache = NULL; ++} ++ ++#else /* CONFIG_EXT3_FS_XATTR_SHARING */ ++ ++int __init ++init_ext3_xattr(void) ++{ ++ return 0; ++} ++ ++void ++exit_ext3_xattr(void) ++{ ++} ++ ++#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/fs/ext3/xattr_user.c 2003-05-07 18:08:03.000000000 +0800 +@@ -0,0 +1,111 @@ ++/* ++ * linux/fs/ext3/xattr_user.c ++ * Handler for extended user attributes. ++ * ++ * Copyright (C) 2001 by Andreas Gruenbacher, ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_EXT3_FS_POSIX_ACL ++# include ++#endif ++ ++#define XATTR_USER_PREFIX "user." ++ ++static size_t ++ext3_xattr_user_list(char *list, struct inode *inode, ++ const char *name, int name_len) ++{ ++ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; ++ ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return 0; ++ ++ if (list) { ++ memcpy(list, XATTR_USER_PREFIX, prefix_len); ++ memcpy(list+prefix_len, name, name_len); ++ list[prefix_len + name_len] = '\0'; ++ } ++ return prefix_len + name_len + 1; ++} ++ ++static int ++ext3_xattr_user_get(struct inode *inode, const char *name, ++ void *buffer, size_t size) ++{ ++ int error; ++ ++ if (strcmp(name, "") == 0) ++ return -EINVAL; ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return -ENOTSUP; ++#ifdef CONFIG_EXT3_FS_POSIX_ACL ++ error = ext3_permission_locked(inode, MAY_READ); ++#else ++ error = permission(inode, MAY_READ); ++#endif ++ if (error) ++ return error; ++ ++ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, ++ buffer, size); ++} ++ ++static int ++ext3_xattr_user_set(struct inode *inode, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ handle_t *handle; ++ int error; ++ ++ if (strcmp(name, "") == 0) ++ return -EINVAL; ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return -ENOTSUP; ++ if ( !S_ISREG(inode->i_mode) && ++ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) ++ return -EPERM; ++#ifdef CONFIG_EXT3_FS_POSIX_ACL ++ error = ext3_permission_locked(inode, MAY_WRITE); ++#else ++ error = permission(inode, MAY_WRITE); ++#endif ++ if (error) ++ return error; ++ ++ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, ++ value, size, flags); ++ ext3_journal_stop(handle, inode); ++ ++ return error; ++} ++ ++struct ext3_xattr_handler ext3_xattr_user_handler = { ++ prefix: XATTR_USER_PREFIX, ++ list: ext3_xattr_user_list, ++ get: ext3_xattr_user_get, ++ set: ext3_xattr_user_set, ++}; ++ ++int __init ++init_ext3_xattr_user(void) ++{ ++ return ext3_xattr_register(EXT3_XATTR_INDEX_USER, ++ &ext3_xattr_user_handler); ++} ++ ++void ++exit_ext3_xattr_user(void) ++{ ++ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, ++ &ext3_xattr_user_handler); ++} +--- linux-2.4.20/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/fs/jfs/jfs_xattr.h 2003-05-07 18:08:03.000000000 +0800 +@@ -52,8 +52,10 @@ struct jfs_ea_list { + #define END_EALIST(ealist) \ + ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) + +-extern int __jfs_setxattr(struct inode *, const char *, void *, size_t, int); +-extern int jfs_setxattr(struct dentry *, const char *, void *, size_t, int); ++extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t, ++ int); ++extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, ++ int); + extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); + extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); + extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); +--- linux-2.4.20/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/fs/jfs/xattr.c 2003-05-07 18:08:03.000000000 +0800 +@@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s + } + + static int can_set_xattr(struct inode *inode, const char *name, +- void *value, size_t value_len) ++ const void *value, size_t value_len) + { + if (IS_RDONLY(inode)) + return -EROFS; +@@ -660,7 +660,7 @@ static int can_set_xattr(struct inode *i + return permission(inode, MAY_WRITE); + } + +-int __jfs_setxattr(struct inode *inode, const char *name, void *value, ++int __jfs_setxattr(struct inode *inode, const char *name, const void *value, + size_t value_len, int flags) + { + struct jfs_ea_list *ealist; +@@ -799,7 +799,7 @@ int __jfs_setxattr(struct inode *inode, + return rc; + } + +-int jfs_setxattr(struct dentry *dentry, const char *name, void *value, ++int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t value_len, int flags) + { + if (value == NULL) { /* empty EA, do not remove */ +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/fs/mbcache.c 2003-05-07 18:08:03.000000000 +0800 +@@ -0,0 +1,648 @@ ++/* ++ * linux/fs/mbcache.c ++ * (C) 2001-2002 Andreas Gruenbacher, ++ */ ++ ++/* ++ * Filesystem Meta Information Block Cache (mbcache) ++ * ++ * The mbcache caches blocks of block devices that need to be located ++ * by their device/block number, as well as by other criteria (such ++ * as the block's contents). ++ * ++ * There can only be one cache entry in a cache per device and block number. ++ * Additional indexes need not be unique in this sense. The number of ++ * additional indexes (=other criteria) can be hardwired at compile time ++ * or specified at cache create time. ++ * ++ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' ++ * in the cache. A valid entry is in the main hash tables of the cache, ++ * and may also be in the lru list. An invalid entry is not in any hashes ++ * or lists. ++ * ++ * A valid cache entry is only in the lru list if no handles refer to it. ++ * Invalid cache entries will be freed when the last handle to the cache ++ * entry is released. Entries that cannot be freed immediately are put ++ * back on the lru list. ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++#ifdef MB_CACHE_DEBUG ++# define mb_debug(f...) do { \ ++ printk(KERN_DEBUG f); \ ++ printk("\n"); \ ++ } while (0) ++#define mb_assert(c) do { if (!(c)) \ ++ printk(KERN_ERR "assertion " #c " failed\n"); \ ++ } while(0) ++#else ++# define mb_debug(f...) do { } while(0) ++# define mb_assert(c) do { } while(0) ++#endif ++#define mb_error(f...) do { \ ++ printk(KERN_ERR f); \ ++ printk("\n"); \ ++ } while(0) ++ ++MODULE_AUTHOR("Andreas Gruenbacher "); ++MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) ++MODULE_LICENSE("GPL"); ++#endif ++ ++EXPORT_SYMBOL(mb_cache_create); ++EXPORT_SYMBOL(mb_cache_shrink); ++EXPORT_SYMBOL(mb_cache_destroy); ++EXPORT_SYMBOL(mb_cache_entry_alloc); ++EXPORT_SYMBOL(mb_cache_entry_insert); ++EXPORT_SYMBOL(mb_cache_entry_release); ++EXPORT_SYMBOL(mb_cache_entry_takeout); ++EXPORT_SYMBOL(mb_cache_entry_free); ++EXPORT_SYMBOL(mb_cache_entry_dup); ++EXPORT_SYMBOL(mb_cache_entry_get); ++#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) ++EXPORT_SYMBOL(mb_cache_entry_find_first); ++EXPORT_SYMBOL(mb_cache_entry_find_next); ++#endif ++ ++ ++/* ++ * Global data: list of all mbcache's, lru list, and a spinlock for ++ * accessing cache data structures on SMP machines. The lru list is ++ * global across all mbcaches. ++ */ ++ ++static LIST_HEAD(mb_cache_list); ++static LIST_HEAD(mb_cache_lru_list); ++static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; ++ ++static inline int ++mb_cache_indexes(struct mb_cache *cache) ++{ ++#ifdef MB_CACHE_INDEXES_COUNT ++ return MB_CACHE_INDEXES_COUNT; ++#else ++ return cache->c_indexes_count; ++#endif ++} ++ ++/* ++ * What the mbcache registers as to get shrunk dynamically. ++ */ ++ ++static void ++mb_cache_memory_pressure(int priority, unsigned int gfp_mask); ++ ++static struct cache_definition mb_cache_definition = { ++ "mb_cache", ++ mb_cache_memory_pressure ++}; ++ ++ ++static inline int ++__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) ++{ ++ return !list_empty(&ce->e_block_list); ++} ++ ++ ++static inline void ++__mb_cache_entry_unhash(struct mb_cache_entry *ce) ++{ ++ int n; ++ ++ if (__mb_cache_entry_is_hashed(ce)) { ++ list_del_init(&ce->e_block_list); ++ for (n=0; ne_cache); n++) ++ list_del(&ce->e_indexes[n].o_list); ++ } ++} ++ ++ ++static inline void ++__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) ++{ ++ struct mb_cache *cache = ce->e_cache; ++ ++ mb_assert(atomic_read(&ce->e_used) == 0); ++ if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { ++ /* free failed -- put back on the lru list ++ for freeing later. */ ++ spin_lock(&mb_cache_spinlock); ++ list_add(&ce->e_lru_list, &mb_cache_lru_list); ++ spin_unlock(&mb_cache_spinlock); ++ } else { ++ kmem_cache_free(cache->c_entry_cache, ce); ++ atomic_dec(&cache->c_entry_count); ++ } ++} ++ ++ ++static inline void ++__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) ++{ ++ if (atomic_dec_and_test(&ce->e_used)) { ++ if (__mb_cache_entry_is_hashed(ce)) ++ list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); ++ else { ++ spin_unlock(&mb_cache_spinlock); ++ __mb_cache_entry_forget(ce, GFP_KERNEL); ++ return; ++ } ++ } ++ spin_unlock(&mb_cache_spinlock); ++} ++ ++ ++/* ++ * mb_cache_memory_pressure() memory pressure callback ++ * ++ * This function is called by the kernel memory management when memory ++ * gets low. ++ * ++ * @priority: Amount by which to shrink the cache (0 = highes priority) ++ * @gfp_mask: (ignored) ++ */ ++static void ++mb_cache_memory_pressure(int priority, unsigned int gfp_mask) ++{ ++ LIST_HEAD(free_list); ++ struct list_head *l, *ltmp; ++ int count = 0; ++ ++ spin_lock(&mb_cache_spinlock); ++ list_for_each(l, &mb_cache_list) { ++ struct mb_cache *cache = ++ list_entry(l, struct mb_cache, c_cache_list); ++ mb_debug("cache %s (%d)", cache->c_name, ++ atomic_read(&cache->c_entry_count)); ++ count += atomic_read(&cache->c_entry_count); ++ } ++ mb_debug("trying to free %d of %d entries", ++ count / (priority ? priority : 1), count); ++ if (priority) ++ count /= priority; ++ while (count-- && !list_empty(&mb_cache_lru_list)) { ++ struct mb_cache_entry *ce = ++ list_entry(mb_cache_lru_list.next, ++ struct mb_cache_entry, e_lru_list); ++ list_del(&ce->e_lru_list); ++ __mb_cache_entry_unhash(ce); ++ list_add_tail(&ce->e_lru_list, &free_list); ++ } ++ spin_unlock(&mb_cache_spinlock); ++ list_for_each_safe(l, ltmp, &free_list) { ++ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, ++ e_lru_list), gfp_mask); ++ } ++} ++ ++ ++/* ++ * mb_cache_create() create a new cache ++ * ++ * All entries in one cache are equal size. Cache entries may be from ++ * multiple devices. If this is the first mbcache created, registers ++ * the cache with kernel memory management. Returns NULL if no more ++ * memory was available. ++ * ++ * @name: name of the cache (informal) ++ * @cache_op: contains the callback called when freeing a cache entry ++ * @entry_size: The size of a cache entry, including ++ * struct mb_cache_entry ++ * @indexes_count: number of additional indexes in the cache. Must equal ++ * MB_CACHE_INDEXES_COUNT if the number of indexes is ++ * hardwired. ++ * @bucket_count: number of hash buckets ++ */ ++struct mb_cache * ++mb_cache_create(const char *name, struct mb_cache_op *cache_op, ++ size_t entry_size, int indexes_count, int bucket_count) ++{ ++ int m=0, n; ++ struct mb_cache *cache = NULL; ++ ++ if(entry_size < sizeof(struct mb_cache_entry) + ++ indexes_count * sizeof(struct mb_cache_entry_index)) ++ return NULL; ++ ++ MOD_INC_USE_COUNT; ++ cache = kmalloc(sizeof(struct mb_cache) + ++ indexes_count * sizeof(struct list_head), GFP_KERNEL); ++ if (!cache) ++ goto fail; ++ cache->c_name = name; ++ cache->c_op.free = NULL; ++ if (cache_op) ++ cache->c_op.free = cache_op->free; ++ atomic_set(&cache->c_entry_count, 0); ++ cache->c_bucket_count = bucket_count; ++#ifdef MB_CACHE_INDEXES_COUNT ++ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); ++#else ++ cache->c_indexes_count = indexes_count; ++#endif ++ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), ++ GFP_KERNEL); ++ if (!cache->c_block_hash) ++ goto fail; ++ for (n=0; nc_block_hash[n]); ++ for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * ++ sizeof(struct list_head), ++ GFP_KERNEL); ++ if (!cache->c_indexes_hash[m]) ++ goto fail; ++ for (n=0; nc_indexes_hash[m][n]); ++ } ++ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, ++ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); ++ if (!cache->c_entry_cache) ++ goto fail; ++ ++ spin_lock(&mb_cache_spinlock); ++ list_add(&cache->c_cache_list, &mb_cache_list); ++ spin_unlock(&mb_cache_spinlock); ++ return cache; ++ ++fail: ++ if (cache) { ++ while (--m >= 0) ++ kfree(cache->c_indexes_hash[m]); ++ if (cache->c_block_hash) ++ kfree(cache->c_block_hash); ++ kfree(cache); ++ } ++ MOD_DEC_USE_COUNT; ++ return NULL; ++} ++ ++ ++/* ++ * mb_cache_shrink() ++ * ++ * Removes all cache entires of a device from the cache. All cache entries ++ * currently in use cannot be freed, and thus remain in the cache. ++ * ++ * @cache: which cache to shrink ++ * @dev: which device's cache entries to shrink ++ */ ++void ++mb_cache_shrink(struct mb_cache *cache, kdev_t dev) ++{ ++ LIST_HEAD(free_list); ++ struct list_head *l, *ltmp; ++ ++ spin_lock(&mb_cache_spinlock); ++ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, e_lru_list); ++ if (ce->e_dev == dev) { ++ list_del(&ce->e_lru_list); ++ list_add_tail(&ce->e_lru_list, &free_list); ++ __mb_cache_entry_unhash(ce); ++ } ++ } ++ spin_unlock(&mb_cache_spinlock); ++ list_for_each_safe(l, ltmp, &free_list) { ++ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, ++ e_lru_list), GFP_KERNEL); ++ } ++} ++ ++ ++/* ++ * mb_cache_destroy() ++ * ++ * Shrinks the cache to its minimum possible size (hopefully 0 entries), ++ * and then destroys it. If this was the last mbcache, un-registers the ++ * mbcache from kernel memory management. ++ */ ++void ++mb_cache_destroy(struct mb_cache *cache) ++{ ++ LIST_HEAD(free_list); ++ struct list_head *l, *ltmp; ++ int n; ++ ++ spin_lock(&mb_cache_spinlock); ++ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, e_lru_list); ++ if (ce->e_cache == cache) { ++ list_del(&ce->e_lru_list); ++ list_add_tail(&ce->e_lru_list, &free_list); ++ __mb_cache_entry_unhash(ce); ++ } ++ } ++ list_del(&cache->c_cache_list); ++ spin_unlock(&mb_cache_spinlock); ++ list_for_each_safe(l, ltmp, &free_list) { ++ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, ++ e_lru_list), GFP_KERNEL); ++ } ++ ++ if (atomic_read(&cache->c_entry_count) > 0) { ++ mb_error("cache %s: %d orphaned entries", ++ cache->c_name, ++ atomic_read(&cache->c_entry_count)); ++ } ++ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)) ++ /* We don't have kmem_cache_destroy() in 2.2.x */ ++ kmem_cache_shrink(cache->c_entry_cache); ++#else ++ kmem_cache_destroy(cache->c_entry_cache); ++#endif ++ for (n=0; n < mb_cache_indexes(cache); n++) ++ kfree(cache->c_indexes_hash[n]); ++ kfree(cache->c_block_hash); ++ kfree(cache); ++ ++ MOD_DEC_USE_COUNT; ++} ++ ++ ++/* ++ * mb_cache_entry_alloc() ++ * ++ * Allocates a new cache entry. The new entry will not be valid initially, ++ * and thus cannot be looked up yet. It should be filled with data, and ++ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL ++ * if no more memory was available. ++ */ ++struct mb_cache_entry * ++mb_cache_entry_alloc(struct mb_cache *cache) ++{ ++ struct mb_cache_entry *ce; ++ ++ atomic_inc(&cache->c_entry_count); ++ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); ++ if (ce) { ++ INIT_LIST_HEAD(&ce->e_lru_list); ++ INIT_LIST_HEAD(&ce->e_block_list); ++ ce->e_cache = cache; ++ atomic_set(&ce->e_used, 1); ++ } ++ return ce; ++} ++ ++ ++/* ++ * mb_cache_entry_insert() ++ * ++ * Inserts an entry that was allocated using mb_cache_entry_alloc() into ++ * the cache. After this, the cache entry can be looked up, but is not yet ++ * in the lru list as the caller still holds a handle to it. Returns 0 on ++ * success, or -EBUSY if a cache entry for that device + inode exists ++ * already (this may happen after a failed lookup, if another process has ++ * inserted the same cache entry in the meantime). ++ * ++ * @dev: device the cache entry belongs to ++ * @block: block number ++ * @keys: array of additional keys. There must be indexes_count entries ++ * in the array (as specified when creating the cache). ++ */ ++int ++mb_cache_entry_insert(struct mb_cache_entry *ce, kdev_t dev, ++ unsigned long block, unsigned int keys[]) ++{ ++ struct mb_cache *cache = ce->e_cache; ++ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; ++ struct list_head *l; ++ int error = -EBUSY, n; ++ ++ spin_lock(&mb_cache_spinlock); ++ list_for_each(l, &cache->c_block_hash[bucket]) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, e_block_list); ++ if (ce->e_dev == dev && ce->e_block == block) ++ goto out; ++ } ++ __mb_cache_entry_unhash(ce); ++ ce->e_dev = dev; ++ ce->e_block = block; ++ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); ++ for (n=0; ne_indexes[n].o_key = keys[n]; ++ bucket = keys[n] % cache->c_bucket_count; ++ list_add(&ce->e_indexes[n].o_list, ++ &cache->c_indexes_hash[n][bucket]); ++ } ++out: ++ spin_unlock(&mb_cache_spinlock); ++ return error; ++} ++ ++ ++/* ++ * mb_cache_entry_release() ++ * ++ * Release a handle to a cache entry. When the last handle to a cache entry ++ * is released it is either freed (if it is invalid) or otherwise inserted ++ * in to the lru list. ++ */ ++void ++mb_cache_entry_release(struct mb_cache_entry *ce) ++{ ++ spin_lock(&mb_cache_spinlock); ++ __mb_cache_entry_release_unlock(ce); ++} ++ ++ ++/* ++ * mb_cache_entry_takeout() ++ * ++ * Take a cache entry out of the cache, making it invalid. The entry can later ++ * be re-inserted using mb_cache_entry_insert(), or released using ++ * mb_cache_entry_release(). ++ */ ++void ++mb_cache_entry_takeout(struct mb_cache_entry *ce) ++{ ++ spin_lock(&mb_cache_spinlock); ++ mb_assert(list_empty(&ce->e_lru_list)); ++ __mb_cache_entry_unhash(ce); ++ spin_unlock(&mb_cache_spinlock); ++} ++ ++ ++/* ++ * mb_cache_entry_free() ++ * ++ * This is equivalent to the sequence mb_cache_entry_takeout() -- ++ * mb_cache_entry_release(). ++ */ ++void ++mb_cache_entry_free(struct mb_cache_entry *ce) ++{ ++ spin_lock(&mb_cache_spinlock); ++ mb_assert(list_empty(&ce->e_lru_list)); ++ __mb_cache_entry_unhash(ce); ++ __mb_cache_entry_release_unlock(ce); ++} ++ ++ ++/* ++ * mb_cache_entry_dup() ++ * ++ * Duplicate a handle to a cache entry (does not duplicate the cache entry ++ * itself). After the call, both the old and the new handle must be released. ++ */ ++struct mb_cache_entry * ++mb_cache_entry_dup(struct mb_cache_entry *ce) ++{ ++ atomic_inc(&ce->e_used); ++ return ce; ++} ++ ++ ++/* ++ * mb_cache_entry_get() ++ * ++ * Get a cache entry by device / block number. (There can only be one entry ++ * in the cache per device and block.) Returns NULL if no such cache entry ++ * exists. ++ */ ++struct mb_cache_entry * ++mb_cache_entry_get(struct mb_cache *cache, kdev_t dev, unsigned long block) ++{ ++ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; ++ struct list_head *l; ++ struct mb_cache_entry *ce; ++ ++ spin_lock(&mb_cache_spinlock); ++ list_for_each(l, &cache->c_block_hash[bucket]) { ++ ce = list_entry(l, struct mb_cache_entry, e_block_list); ++ if (ce->e_dev == dev && ce->e_block == block) { ++ if (!list_empty(&ce->e_lru_list)) ++ list_del_init(&ce->e_lru_list); ++ atomic_inc(&ce->e_used); ++ goto cleanup; ++ } ++ } ++ ce = NULL; ++ ++cleanup: ++ spin_unlock(&mb_cache_spinlock); ++ return ce; ++} ++ ++#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) ++ ++static struct mb_cache_entry * ++__mb_cache_entry_find(struct list_head *l, struct list_head *head, ++ int index, kdev_t dev, unsigned int key) ++{ ++ while (l != head) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, ++ e_indexes[index].o_list); ++ if (ce->e_dev == dev && ce->e_indexes[index].o_key == key) { ++ if (!list_empty(&ce->e_lru_list)) ++ list_del_init(&ce->e_lru_list); ++ atomic_inc(&ce->e_used); ++ return ce; ++ } ++ l = l->next; ++ } ++ return NULL; ++} ++ ++ ++/* ++ * mb_cache_entry_find_first() ++ * ++ * Find the first cache entry on a given device with a certain key in ++ * an additional index. Additonal matches can be found with ++ * mb_cache_entry_find_next(). Returns NULL if no match was found. ++ * ++ * @cache: the cache to search ++ * @index: the number of the additonal index to search (0<=indexc_bucket_count; ++ struct list_head *l; ++ struct mb_cache_entry *ce; ++ ++ mb_assert(index < mb_cache_indexes(cache)); ++ spin_lock(&mb_cache_spinlock); ++ l = cache->c_indexes_hash[index][bucket].next; ++ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], ++ index, dev, key); ++ spin_unlock(&mb_cache_spinlock); ++ return ce; ++} ++ ++ ++/* ++ * mb_cache_entry_find_next() ++ * ++ * Find the next cache entry on a given device with a certain key in an ++ * additional index. Returns NULL if no match could be found. The previous ++ * entry is atomatically released, so that mb_cache_entry_find_next() can ++ * be called like this: ++ * ++ * entry = mb_cache_entry_find_first(); ++ * while (entry) { ++ * ... ++ * entry = mb_cache_entry_find_next(entry, ...); ++ * } ++ * ++ * @prev: The previous match ++ * @index: the number of the additonal index to search (0<=indexe_cache; ++ unsigned int bucket = key % cache->c_bucket_count; ++ struct list_head *l; ++ struct mb_cache_entry *ce; ++ ++ mb_assert(index < mb_cache_indexes(cache)); ++ spin_lock(&mb_cache_spinlock); ++ l = prev->e_indexes[index].o_list.next; ++ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], ++ index, dev, key); ++ __mb_cache_entry_release_unlock(prev); ++ return ce; ++} ++ ++#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ ++ ++static int __init init_mbcache(void) ++{ ++ register_cache(&mb_cache_definition); ++ return 0; ++} ++ ++static void __exit exit_mbcache(void) ++{ ++ unregister_cache(&mb_cache_definition); ++} ++ ++module_init(init_mbcache) ++module_exit(exit_mbcache) ++ +--- linux-2.4.20/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 ++++ linux-2.4.20-root/include/asm-arm/unistd.h 2003-05-07 18:08:03.000000000 +0800 +@@ -244,7 +244,6 @@ + #define __NR_security (__NR_SYSCALL_BASE+223) + #define __NR_gettid (__NR_SYSCALL_BASE+224) + #define __NR_readahead (__NR_SYSCALL_BASE+225) +-#if 0 /* allocated in 2.5 */ + #define __NR_setxattr (__NR_SYSCALL_BASE+226) + #define __NR_lsetxattr (__NR_SYSCALL_BASE+227) + #define __NR_fsetxattr (__NR_SYSCALL_BASE+228) +@@ -257,7 +256,6 @@ + #define __NR_removexattr (__NR_SYSCALL_BASE+235) + #define __NR_lremovexattr (__NR_SYSCALL_BASE+236) + #define __NR_fremovexattr (__NR_SYSCALL_BASE+237) +-#endif + #define __NR_tkill (__NR_SYSCALL_BASE+238) + /* + * Please check 2.5 _before_ adding calls here, +--- linux-2.4.20/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 ++++ linux-2.4.20-root/include/asm-ppc64/unistd.h 2003-05-07 18:08:03.000000000 +0800 +@@ -218,6 +218,7 @@ + #define __NR_gettid 207 + #if 0 /* Reserved syscalls */ + #define __NR_tkill 208 ++#endif + #define __NR_setxattr 209 + #define __NR_lsetxattr 210 + #define __NR_fsetxattr 211 +@@ -230,6 +231,7 @@ + #define __NR_removexattr 218 + #define __NR_lremovexattr 219 + #define __NR_fremovexattr 220 ++#if 0 /* Reserved syscalls */ + #define __NR_futex 221 + #endif + +--- linux-2.4.20/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 ++++ linux-2.4.20-root/include/asm-s390/unistd.h 2003-05-07 18:08:03.000000000 +0800 +@@ -212,9 +212,18 @@ + #define __NR_getdents64 220 + #define __NR_fcntl64 221 + #define __NR_readahead 222 +-/* +- * Numbers 224-235 are reserved for posix acl +- */ ++#define __NR_setxattr 224 ++#define __NR_lsetxattr 225 ++#define __NR_fsetxattr 226 ++#define __NR_getxattr 227 ++#define __NR_lgetxattr 228 ++#define __NR_fgetxattr 229 ++#define __NR_listxattr 230 ++#define __NR_llistxattr 231 ++#define __NR_flistxattr 232 ++#define __NR_removexattr 233 ++#define __NR_lremovexattr 234 ++#define __NR_fremovexattr 235 + #define __NR_gettid 236 + #define __NR_tkill 237 + +--- linux-2.4.20/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 ++++ linux-2.4.20-root/include/asm-s390x/unistd.h 2003-05-07 18:08:03.000000000 +0800 +@@ -180,9 +180,18 @@ + #define __NR_mincore 218 + #define __NR_madvise 219 + #define __NR_readahead 222 +-/* +- * Numbers 224-235 are reserved for posix acl +- */ ++#define __NR_setxattr 224 ++#define __NR_lsetxattr 225 ++#define __NR_fsetxattr 226 ++#define __NR_getxattr 227 ++#define __NR_lgetxattr 228 ++#define __NR_fgetxattr 229 ++#define __NR_listxattr 230 ++#define __NR_llistxattr 231 ++#define __NR_flistxattr 232 ++#define __NR_removexattr 233 ++#define __NR_lremovexattr 234 ++#define __NR_fremovexattr 235 + #define __NR_gettid 236 + #define __NR_tkill 237 + +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/include/linux/cache_def.h 2003-05-07 18:08:03.000000000 +0800 +@@ -0,0 +1,15 @@ ++/* ++ * linux/cache_def.h ++ * Handling of caches defined in drivers, filesystems, ... ++ * ++ * Copyright (C) 2002 by Andreas Gruenbacher, ++ */ ++ ++struct cache_definition { ++ const char *name; ++ void (*shrink)(int, unsigned int); ++ struct list_head link; ++}; ++ ++extern void register_cache(struct cache_definition *); ++extern void unregister_cache(struct cache_definition *); +--- linux-2.4.20/include/linux/errno.h~linux-2.4.20-xattr-0.8.54 2003-04-14 16:39:03.000000000 +0800 ++++ linux-2.4.20-root/include/linux/errno.h 2003-05-07 18:08:03.000000000 +0800 +@@ -23,4 +23,8 @@ + + #endif + ++/* Defined for extended attributes */ ++#define ENOATTR ENODATA /* No such attribute */ ++#define ENOTSUP EOPNOTSUPP /* Operation not supported */ ++ + #endif +--- linux-2.4.20/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54 2003-04-14 16:39:08.000000000 +0800 ++++ linux-2.4.20-root/include/linux/ext2_fs.h 2003-05-07 18:08:03.000000000 +0800 +@@ -57,8 +57,6 @@ + */ + #define EXT2_BAD_INO 1 /* Bad blocks inode */ + #define EXT2_ROOT_INO 2 /* Root inode */ +-#define EXT2_ACL_IDX_INO 3 /* ACL inode */ +-#define EXT2_ACL_DATA_INO 4 /* ACL inode */ + #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ + #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ + +@@ -86,7 +84,6 @@ + #else + # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) + #endif +-#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) + #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) + #ifdef __KERNEL__ + # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +@@ -121,28 +118,6 @@ + #endif + + /* +- * ACL structures +- */ +-struct ext2_acl_header /* Header of Access Control Lists */ +-{ +- __u32 aclh_size; +- __u32 aclh_file_count; +- __u32 aclh_acle_count; +- __u32 aclh_first_acle; +-}; +- +-struct ext2_acl_entry /* Access Control List Entry */ +-{ +- __u32 acle_size; +- __u16 acle_perms; /* Access permissions */ +- __u16 acle_type; /* Type of entry */ +- __u16 acle_tag; /* User or group identity */ +- __u16 acle_pad1; +- __u32 acle_next; /* Pointer on next entry for the */ +- /* same inode or on next free entry */ +-}; +- +-/* + * Structure of a blocks group descriptor + */ + struct ext2_group_desc +@@ -314,6 +289,7 @@ struct ext2_inode { + #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ + #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ + #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ ++#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ + + #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt + #define set_opt(o, opt) o |= EXT2_MOUNT_##opt +@@ -397,6 +373,7 @@ struct ext2_super_block { + + #ifdef __KERNEL__ + #define EXT2_SB(sb) (&((sb)->u.ext2_sb)) ++#define EXT2_I(inode) (&((inode)->u.ext2_i)) + #else + /* Assume that user mode programs are passing in an ext2fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test +@@ -466,7 +443,7 @@ struct ext2_super_block { + #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 + #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff + +-#define EXT2_FEATURE_COMPAT_SUPP 0 ++#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR + #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE + #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ +@@ -623,8 +600,10 @@ extern struct address_space_operations e + + /* namei.c */ + extern struct inode_operations ext2_dir_inode_operations; ++extern struct inode_operations ext2_special_inode_operations; + + /* symlink.c */ ++extern struct inode_operations ext2_symlink_inode_operations; + extern struct inode_operations ext2_fast_symlink_inode_operations; + + #endif /* __KERNEL__ */ +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/include/linux/ext2_xattr.h 2003-05-07 18:08:03.000000000 +0800 +@@ -0,0 +1,157 @@ ++/* ++ File: linux/ext2_xattr.h ++ ++ On-disk format of extended attributes for the ext2 filesystem. ++ ++ (C) 2001 Andreas Gruenbacher, ++*/ ++ ++#include ++#include ++#include ++ ++/* Magic value in attribute blocks */ ++#define EXT2_XATTR_MAGIC 0xEA020000 ++ ++/* Maximum number of references to one attribute block */ ++#define EXT2_XATTR_REFCOUNT_MAX 1024 ++ ++/* Name indexes */ ++#define EXT2_XATTR_INDEX_MAX 10 ++#define EXT2_XATTR_INDEX_USER 1 ++#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 ++#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 ++ ++struct ext2_xattr_header { ++ __u32 h_magic; /* magic number for identification */ ++ __u32 h_refcount; /* reference count */ ++ __u32 h_blocks; /* number of disk blocks used */ ++ __u32 h_hash; /* hash value of all attributes */ ++ __u32 h_reserved[4]; /* zero right now */ ++}; ++ ++struct ext2_xattr_entry { ++ __u8 e_name_len; /* length of name */ ++ __u8 e_name_index; /* attribute name index */ ++ __u16 e_value_offs; /* offset in disk block of value */ ++ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ ++ __u32 e_value_size; /* size of attribute value */ ++ __u32 e_hash; /* hash value of name and value */ ++ char e_name[0]; /* attribute name */ ++}; ++ ++#define EXT2_XATTR_PAD_BITS 2 ++#define EXT2_XATTR_PAD (1<e_name_len)) ) ++#define EXT2_XATTR_SIZE(size) \ ++ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) ++ ++#ifdef __KERNEL__ ++ ++# ifdef CONFIG_EXT2_FS_XATTR ++ ++struct ext2_xattr_handler { ++ char *prefix; ++ size_t (*list)(char *list, struct inode *inode, const char *name, ++ int name_len); ++ int (*get)(struct inode *inode, const char *name, void *buffer, ++ size_t size); ++ int (*set)(struct inode *inode, const char *name, const void *buffer, ++ size_t size, int flags); ++}; ++ ++extern int ext2_xattr_register(int, struct ext2_xattr_handler *); ++extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *); ++ ++extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int); ++extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t); ++extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); ++extern int ext2_removexattr(struct dentry *, const char *); ++ ++extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); ++extern int ext2_xattr_list(struct inode *, char *, size_t); ++extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); ++ ++extern void ext2_xattr_delete_inode(struct inode *); ++extern void ext2_xattr_put_super(struct super_block *); ++ ++extern int init_ext2_xattr(void) __init; ++extern void exit_ext2_xattr(void); ++ ++# else /* CONFIG_EXT2_FS_XATTR */ ++# define ext2_setxattr NULL ++# define ext2_getxattr NULL ++# define ext2_listxattr NULL ++# define ext2_removexattr NULL ++ ++static inline int ++ext2_xattr_get(struct inode *inode, int name_index, ++ const char *name, void *buffer, size_t size) ++{ ++ return -ENOTSUP; ++} ++ ++static inline int ++ext2_xattr_list(struct inode *inode, char *buffer, size_t size) ++{ ++ return -ENOTSUP; ++} ++ ++static inline int ++ext2_xattr_set(struct inode *inode, int name_index, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ return -ENOTSUP; ++} ++ ++static inline void ++ext2_xattr_delete_inode(struct inode *inode) ++{ ++} ++ ++static inline void ++ext2_xattr_put_super(struct super_block *sb) ++{ ++} ++ ++static inline int ++init_ext2_xattr(void) ++{ ++ return 0; ++} ++ ++static inline void ++exit_ext2_xattr(void) ++{ ++} ++ ++# endif /* CONFIG_EXT2_FS_XATTR */ ++ ++# ifdef CONFIG_EXT2_FS_XATTR_USER ++ ++extern int init_ext2_xattr_user(void) __init; ++extern void exit_ext2_xattr_user(void); ++ ++# else /* CONFIG_EXT2_FS_XATTR_USER */ ++ ++static inline int ++init_ext2_xattr_user(void) ++{ ++ return 0; ++} ++ ++static inline void ++exit_ext2_xattr_user(void) ++{ ++} ++ ++# endif /* CONFIG_EXT2_FS_XATTR_USER */ ++ ++#endif /* __KERNEL__ */ ++ +--- linux-2.4.20/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:04.000000000 +0800 ++++ linux-2.4.20-root/include/linux/ext3_fs.h 2003-05-07 18:08:03.000000000 +0800 +@@ -63,8 +63,6 @@ + */ + #define EXT3_BAD_INO 1 /* Bad blocks inode */ + #define EXT3_ROOT_INO 2 /* Root inode */ +-#define EXT3_ACL_IDX_INO 3 /* ACL inode */ +-#define EXT3_ACL_DATA_INO 4 /* ACL inode */ + #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ + #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ + #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ +@@ -94,7 +92,6 @@ + #else + # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) + #endif +-#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) + #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) + #ifdef __KERNEL__ + # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +@@ -129,28 +126,6 @@ + #endif + + /* +- * ACL structures +- */ +-struct ext3_acl_header /* Header of Access Control Lists */ +-{ +- __u32 aclh_size; +- __u32 aclh_file_count; +- __u32 aclh_acle_count; +- __u32 aclh_first_acle; +-}; +- +-struct ext3_acl_entry /* Access Control List Entry */ +-{ +- __u32 acle_size; +- __u16 acle_perms; /* Access permissions */ +- __u16 acle_type; /* Type of entry */ +- __u16 acle_tag; /* User or group identity */ +- __u16 acle_pad1; +- __u32 acle_next; /* Pointer on next entry for the */ +- /* same inode or on next free entry */ +-}; +- +-/* + * Structure of a blocks group descriptor + */ + struct ext3_group_desc +@@ -344,6 +319,7 @@ struct ext3_inode { + #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ + #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ + #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ ++#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H +@@ -520,7 +496,7 @@ struct ext3_super_block { + #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ + #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ + +-#define EXT3_FEATURE_COMPAT_SUPP 0 ++#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR + #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ + EXT3_FEATURE_INCOMPAT_RECOVER) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ +@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st + extern unsigned long ext3_count_free (struct buffer_head *, unsigned); + + /* inode.c */ ++extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); + extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); + extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); + +@@ -771,8 +748,10 @@ extern struct address_space_operations e + + /* namei.c */ + extern struct inode_operations ext3_dir_inode_operations; ++extern struct inode_operations ext3_special_inode_operations; + + /* symlink.c */ ++extern struct inode_operations ext3_symlink_inode_operations; + extern struct inode_operations ext3_fast_symlink_inode_operations; + + +--- linux-2.4.20/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:02.000000000 +0800 ++++ linux-2.4.20-root/include/linux/ext3_jbd.h 2003-05-07 18:08:03.000000000 +0800 +@@ -30,13 +30,19 @@ + + #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 + ++/* Extended attributes may touch two data buffers, two bitmap buffers, ++ * and two group and summaries. */ ++ ++#define EXT3_XATTR_TRANS_BLOCKS 8 ++ + /* Define the minimum size for a transaction which modifies data. This + * needs to take into account the fact that we may end up modifying two + * quota files too (one for the group, one for the user quota). The + * superblock only gets updated once, of course, so don't bother + * counting that again for the quota updates. */ + +-#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) ++#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ ++ EXT3_XATTR_TRANS_BLOCKS - 2) + + extern int ext3_writepage_trans_blocks(struct inode *inode); + +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/include/linux/ext3_xattr.h 2003-05-07 18:08:03.000000000 +0800 +@@ -0,0 +1,157 @@ ++/* ++ File: linux/ext3_xattr.h ++ ++ On-disk format of extended attributes for the ext3 filesystem. ++ ++ (C) 2001 Andreas Gruenbacher, ++*/ ++ ++#include ++#include ++#include ++ ++/* Magic value in attribute blocks */ ++#define EXT3_XATTR_MAGIC 0xEA020000 ++ ++/* Maximum number of references to one attribute block */ ++#define EXT3_XATTR_REFCOUNT_MAX 1024 ++ ++/* Name indexes */ ++#define EXT3_XATTR_INDEX_MAX 10 ++#define EXT3_XATTR_INDEX_USER 1 ++#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 ++#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 ++ ++struct ext3_xattr_header { ++ __u32 h_magic; /* magic number for identification */ ++ __u32 h_refcount; /* reference count */ ++ __u32 h_blocks; /* number of disk blocks used */ ++ __u32 h_hash; /* hash value of all attributes */ ++ __u32 h_reserved[4]; /* zero right now */ ++}; ++ ++struct ext3_xattr_entry { ++ __u8 e_name_len; /* length of name */ ++ __u8 e_name_index; /* attribute name index */ ++ __u16 e_value_offs; /* offset in disk block of value */ ++ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ ++ __u32 e_value_size; /* size of attribute value */ ++ __u32 e_hash; /* hash value of name and value */ ++ char e_name[0]; /* attribute name */ ++}; ++ ++#define EXT3_XATTR_PAD_BITS 2 ++#define EXT3_XATTR_PAD (1<e_name_len)) ) ++#define EXT3_XATTR_SIZE(size) \ ++ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) ++ ++#ifdef __KERNEL__ ++ ++# ifdef CONFIG_EXT3_FS_XATTR ++ ++struct ext3_xattr_handler { ++ char *prefix; ++ size_t (*list)(char *list, struct inode *inode, const char *name, ++ int name_len); ++ int (*get)(struct inode *inode, const char *name, void *buffer, ++ size_t size); ++ int (*set)(struct inode *inode, const char *name, const void *buffer, ++ size_t size, int flags); ++}; ++ ++extern int ext3_xattr_register(int, struct ext3_xattr_handler *); ++extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); ++ ++extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); ++extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); ++extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); ++extern int ext3_removexattr(struct dentry *, const char *); ++ ++extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); ++extern int ext3_xattr_list(struct inode *, char *, size_t); ++extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); ++ ++extern void ext3_xattr_delete_inode(handle_t *, struct inode *); ++extern void ext3_xattr_put_super(struct super_block *); ++ ++extern int init_ext3_xattr(void) __init; ++extern void exit_ext3_xattr(void); ++ ++# else /* CONFIG_EXT3_FS_XATTR */ ++# define ext3_setxattr NULL ++# define ext3_getxattr NULL ++# define ext3_listxattr NULL ++# define ext3_removexattr NULL ++ ++static inline int ++ext3_xattr_get(struct inode *inode, int name_index, const char *name, ++ void *buffer, size_t size) ++{ ++ return -ENOTSUP; ++} ++ ++static inline int ++ext3_xattr_list(struct inode *inode, void *buffer, size_t size) ++{ ++ return -ENOTSUP; ++} ++ ++static inline int ++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, ++ const char *name, const void *value, size_t size, int flags) ++{ ++ return -ENOTSUP; ++} ++ ++static inline void ++ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) ++{ ++} ++ ++static inline void ++ext3_xattr_put_super(struct super_block *sb) ++{ ++} ++ ++static inline int ++init_ext3_xattr(void) ++{ ++ return 0; ++} ++ ++static inline void ++exit_ext3_xattr(void) ++{ ++} ++ ++# endif /* CONFIG_EXT3_FS_XATTR */ ++ ++# ifdef CONFIG_EXT3_FS_XATTR_USER ++ ++extern int init_ext3_xattr_user(void) __init; ++extern void exit_ext3_xattr_user(void); ++ ++# else /* CONFIG_EXT3_FS_XATTR_USER */ ++ ++static inline int ++init_ext3_xattr_user(void) ++{ ++ return 0; ++} ++ ++static inline void ++exit_ext3_xattr_user(void) ++{ ++} ++ ++#endif /* CONFIG_EXT3_FS_XATTR_USER */ ++ ++#endif /* __KERNEL__ */ ++ +--- linux-2.4.20/include/linux/fs.h~linux-2.4.20-xattr-0.8.54 2003-05-05 19:00:55.000000000 +0800 ++++ linux-2.4.20-root/include/linux/fs.h 2003-05-07 18:08:03.000000000 +0800 +@@ -888,7 +888,7 @@ struct inode_operations { + int (*setattr) (struct dentry *, struct iattr *); + int (*setattr_raw) (struct inode *, struct iattr *); + int (*getattr) (struct dentry *, struct iattr *); +- int (*setxattr) (struct dentry *, const char *, void *, size_t, int); ++ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); + ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + ssize_t (*listxattr) (struct dentry *, char *, size_t); + int (*removexattr) (struct dentry *, const char *); +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.20-root/include/linux/mbcache.h 2003-05-07 18:08:03.000000000 +0800 +@@ -0,0 +1,69 @@ ++/* ++ File: linux/mbcache.h ++ ++ (C) 2001 by Andreas Gruenbacher, ++*/ ++ ++/* Hardwire the number of additional indexes */ ++#define MB_CACHE_INDEXES_COUNT 1 ++ ++struct mb_cache_entry; ++ ++struct mb_cache_op { ++ int (*free)(struct mb_cache_entry *, int); ++}; ++ ++struct mb_cache { ++ struct list_head c_cache_list; ++ const char *c_name; ++ struct mb_cache_op c_op; ++ atomic_t c_entry_count; ++ int c_bucket_count; ++#ifndef MB_CACHE_INDEXES_COUNT ++ int c_indexes_count; ++#endif ++ kmem_cache_t *c_entry_cache; ++ struct list_head *c_block_hash; ++ struct list_head *c_indexes_hash[0]; ++}; ++ ++struct mb_cache_entry_index { ++ struct list_head o_list; ++ unsigned int o_key; ++}; ++ ++struct mb_cache_entry { ++ struct list_head e_lru_list; ++ struct mb_cache *e_cache; ++ atomic_t e_used; ++ kdev_t e_dev; ++ unsigned long e_block; ++ struct list_head e_block_list; ++ struct mb_cache_entry_index e_indexes[0]; ++}; ++ ++/* Functions on caches */ ++ ++struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, ++ int, int); ++void mb_cache_shrink(struct mb_cache *, kdev_t); ++void mb_cache_destroy(struct mb_cache *); ++ ++/* Functions on cache entries */ ++ ++struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); ++int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long, ++ unsigned int[]); ++void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); ++void mb_cache_entry_release(struct mb_cache_entry *); ++void mb_cache_entry_takeout(struct mb_cache_entry *); ++void mb_cache_entry_free(struct mb_cache_entry *); ++struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); ++struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t, ++ unsigned long); ++#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) ++struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, ++ kdev_t, unsigned int); ++struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, ++ kdev_t, unsigned int); ++#endif +--- linux-2.4.20/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54 2003-05-05 17:43:15.000000000 +0800 ++++ linux-2.4.20-root/kernel/ksyms.c 2003-05-07 18:08:03.000000000 +0800 +@@ -11,6 +11,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -89,6 +90,7 @@ EXPORT_SYMBOL(exit_mm); + EXPORT_SYMBOL(exit_files); + EXPORT_SYMBOL(exit_fs); + EXPORT_SYMBOL(exit_sighand); ++EXPORT_SYMBOL(copy_fs_struct); + + /* internal kernel memory management */ + EXPORT_SYMBOL(_alloc_pages); +@@ -107,6 +109,8 @@ EXPORT_SYMBOL(kmem_cache_validate); + EXPORT_SYMBOL(kmem_cache_alloc); + EXPORT_SYMBOL(kmem_cache_free); + EXPORT_SYMBOL(kmem_cache_size); ++EXPORT_SYMBOL(register_cache); ++EXPORT_SYMBOL(unregister_cache); + EXPORT_SYMBOL(kmalloc); + EXPORT_SYMBOL(kfree); + EXPORT_SYMBOL(vfree); +--- linux-2.4.20/mm/vmscan.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 ++++ linux-2.4.20-root/mm/vmscan.c 2003-05-07 18:08:03.000000000 +0800 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -34,6 +35,39 @@ + */ + #define DEF_PRIORITY (6) + ++static DECLARE_MUTEX(other_caches_sem); ++static LIST_HEAD(cache_definitions); ++ ++void register_cache(struct cache_definition *cache) ++{ ++ down(&other_caches_sem); ++ list_add(&cache->link, &cache_definitions); ++ up(&other_caches_sem); ++} ++ ++void unregister_cache(struct cache_definition *cache) ++{ ++ down(&other_caches_sem); ++ list_del(&cache->link); ++ up(&other_caches_sem); ++} ++ ++static void shrink_other_caches(unsigned int priority, int gfp_mask) ++{ ++ struct list_head *p; ++ ++ if (down_trylock(&other_caches_sem)) ++ return; ++ ++ list_for_each_prev(p, &cache_definitions) { ++ struct cache_definition *cache = ++ list_entry(p, struct cache_definition, link); ++ ++ cache->shrink(priority, gfp_mask); ++ } ++ up(&other_caches_sem); ++} ++ + /* + * The swap-out function returns 1 if it successfully + * scanned all the pages it was asked to (`count'). +@@ -577,6 +611,7 @@ static int shrink_caches(zone_t * classz + + shrink_dcache_memory(priority, gfp_mask); + shrink_icache_memory(priority, gfp_mask); ++ shrink_other_caches(priority, gfp_mask); + #ifdef CONFIG_QUOTA + shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); + #endif +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-root/fs/ext3/ext3-exports.c 2003-05-05 18:19:11.000000000 +0800 +@@ -0,0 +1,13 @@ ++#include ++#include ++#include ++#include ++#include ++ ++EXPORT_SYMBOL(ext3_force_commit); ++EXPORT_SYMBOL(ext3_bread); ++EXPORT_SYMBOL(ext3_xattr_register); ++EXPORT_SYMBOL(ext3_xattr_unregister); ++EXPORT_SYMBOL(ext3_xattr_get); ++EXPORT_SYMBOL(ext3_xattr_list); ++EXPORT_SYMBOL(ext3_xattr_set); + +_ diff --git a/lustre/kernel_patches/patches/netconsole-2.4.20-rh.patch b/lustre/kernel_patches/patches/netconsole-2.4.20-rh.patch new file mode 100644 index 0000000..e7b0479 --- /dev/null +++ b/lustre/kernel_patches/patches/netconsole-2.4.20-rh.patch @@ -0,0 +1,406 @@ +Index: linux-2.4.20-rh/drivers/net/netconsole.c +=================================================================== +--- linux-2.4.20-rh.orig/drivers/net/netconsole.c 2003-07-22 16:02:23.000000000 +0800 ++++ linux-2.4.20-rh/drivers/net/netconsole.c 2003-10-30 11:47:46.000000000 +0800 +@@ -12,6 +12,8 @@ + * + * 2001-09-17 started by Ingo Molnar. + * 2002-03-14 simultaneous syslog packet option by Michael K. Johnson ++ * 2003-10-30 Add sysrq command processing by Wangdi ++ * + */ + + /**************************************************************** +@@ -51,6 +53,7 @@ + #include + #include + #include ++#include "netconsole.h" + + static struct net_device *netconsole_dev; + static u16 source_port, netdump_target_port, netlog_target_port, syslog_target_port; +@@ -62,7 +65,6 @@ + static unsigned int mhz = 500, idle_timeout; + static unsigned long long mhz_cycles, jiffy_cycles; + +-#include "netconsole.h" + + #define MAX_UDP_CHUNK 1460 + #define MAX_PRINT_CHUNK (MAX_UDP_CHUNK-HEADER_LEN) +@@ -188,6 +190,46 @@ + } + } + ++static int thread_stopped = 0; ++/*Interrupt function for netdump */ ++void (*irqfunc)(int, void *, struct pt_regs *); ++static int sysrq_mode = 0; ++static int stop_sysrq_thread = 0; ++#define Set_Sysrq_mode() (sysrq_mode = 1) ++#define Clear_Sysrq_mode() (sysrq_mode = 0) ++wait_queue_head_t sysrq_thread_queue; ++wait_queue_head_t sysrq_thread_waiter_queue; ++ ++void netconsole_do_sysrq(req_t *req) ++{ ++ struct pt_regs regs; ++ struct net_device *dev = netconsole_dev; ++ ++ if (!dev) ++ return; ++ Set_Sysrq_mode(); ++ get_current_regs(®s); ++ if (req->from != 'c'){ ++ handle_sysrq((int)req->from, ®s, NULL); ++ }else{ ++ printk(KERN_INFO"We still not implemented netdump in sysrq\n"); ++ } ++ Clear_Sysrq_mode(); ++} ++static void netdump_poll(struct net_device *dev) ++{ ++ int budget = 1; ++ ++ disable_irq(dev->irq); ++ ++ irqfunc(dev->irq, dev, 0); ++ ++ if(dev->poll && test_bit(__LINK_STATE_RX_SCHED, &dev->state)) ++ dev->poll(dev, &budget); ++ ++ enable_irq(dev->irq); ++ ++} + static struct sk_buff * alloc_netconsole_skb(struct net_device *dev, int len, int reserve) + { + int once = 1; +@@ -209,7 +251,7 @@ + once = 0; + } + Dprintk("alloc skb: polling controller ...\n"); +- dev->poll_controller(dev); ++ netdump_poll(dev); + goto repeat; + } + } +@@ -231,7 +273,7 @@ + spin_unlock(&dev->xmit_lock); + + Dprintk("xmit skb: polling controller ...\n"); +- dev->poll_controller(dev); ++ netdump_poll(dev); + zap_completion_queue(); + goto repeat_poll; + } +@@ -426,18 +468,19 @@ + static spinlock_t sequence_lock = SPIN_LOCK_UNLOCKED; + static unsigned int log_offset; + ++ + static void write_netconsole_msg(struct console *con, const char *msg0, unsigned int msg_len) + { + int len, left, i; + struct net_device *dev; + const char *msg = msg0; + reply_t reply; +- ++ + dev = netconsole_dev; + if (!dev || netdump_mode) + return; + +- if (dev->poll_controller && netif_running(dev)) { ++ if (netif_running(dev)) { + unsigned long flags; + + __save_flags(flags); +@@ -567,8 +610,6 @@ + req_t *req; + struct net_device *dev; + +- if (!netdump_mode) +- return NET_RX_SUCCESS; + #if DEBUG + { + static int packet_count; +@@ -722,8 +763,16 @@ + Dprintk("... netdump from: %08x.\n", req->from); + Dprintk("... netdump to: %08x.\n", req->to); + +- add_new_req(req); ++ if (netdump_mode) ++ add_new_req(req); ++ else if (req->command == COMM_SYSRQ){ ++ add_new_req(req); ++ wake_up(&sysrq_thread_queue); ++ return NET_RX_DROP; ++ } + out: ++ if (!netdump_mode) ++ return NET_RX_SUCCESS; + return NET_RX_DROP; + } + +@@ -763,6 +812,7 @@ + kunmap_atomic(kaddr, KM_NETDUMP); + } + ++ + /* + * This function waits for the client to acknowledge the receipt + * of the netdump startup reply, with the possibility of packets +@@ -792,7 +842,7 @@ + // wait 1 sec. + udelay(100); + Dprintk("handshake: polling controller ...\n"); +- dev->poll_controller(dev); ++ netdump_poll(dev); + zap_completion_queue(); + req = get_new_req(); + if (req) +@@ -904,7 +954,7 @@ + while (netdump_mode) { + __cli(); + Dprintk("main netdump loop: polling controller ...\n"); +- dev->poll_controller(dev); ++ netdump_poll(dev); + zap_completion_queue(); + #if !CLI + __sti(); +@@ -1009,6 +1059,32 @@ + printk("NETDUMP END!\n"); + __restore_flags(flags); + } ++static int netconsole_sysrq_schedule(void *arg) ++{ ++ struct task_struct *tsk = current; ++ ++ sprintf(tsk->comm, "sysrq_schedule"); ++ sigfillset(&tsk->blocked); ++ ++ /* main loop */ ++ thread_stopped = 0; ++ for (;;) { ++ wait_event_interruptible(sysrq_thread_queue, ++ !list_empty(&request_list) || stop_sysrq_thread); ++ while (!list_empty(&request_list)) { ++ req_t *req = get_new_req(); ++ if (req->command == COMM_SYSRQ) ++ netconsole_do_sysrq(req); ++ } ++ if (stop_sysrq_thread) ++ break; ++ wake_up(&sysrq_thread_waiter_queue); ++ } ++ thread_stopped = 1; ++ wake_up(&sysrq_thread_waiter_queue); ++ return 0; ++} ++ + + static char *dev; + static int netdump_target_eth_byte0 = 255; +@@ -1087,11 +1163,12 @@ + + static struct console netconsole = + { flags: CON_ENABLED, write: write_netconsole_msg }; +- + static int init_netconsole(void) + { + struct net_device *ndev = NULL; + struct in_device *in_dev; ++ struct irqaction *action; ++ int rc = 0; + + printk(KERN_INFO "netlog: using network device <%s>\n", dev); + // this will be valid once the device goes up. +@@ -1101,10 +1178,12 @@ + printk(KERN_ERR "netlog: network device %s does not exist, aborting.\n", dev); + return -1; + } ++#if 0 + if (!ndev->poll_controller) { + printk(KERN_ERR "netlog: %s's network driver does not implement netlogging yet, aborting.\n", dev); + return -1; + } ++#endif + in_dev = in_dev_get(ndev); + if (!in_dev) { + printk(KERN_ERR "netlog: network device %s is not an IP protocol device, aborting.\n", dev); +@@ -1214,12 +1293,27 @@ + + mhz_cycles = (unsigned long long)mhz * 1000000ULL; + jiffy_cycles = (unsigned long long)mhz * (1000000/HZ); +- +- INIT_LIST_HEAD(&request_list); +- ++ + ndev->rx_hook = netconsole_rx_hook; + netdump_func = netconsole_netdump; + netconsole_dev = ndev; ++ /* find irq function of the ndev*/ ++ action=find_irq_action(ndev->irq, ndev); ++ if (!action) { ++ printk(KERN_ERR "couldn't find irq handler for <%s>", dev); ++ return -1; ++ } ++ irqfunc = action->handler; ++ ++ stop_sysrq_thread = 0; ++ INIT_LIST_HEAD(&request_list); ++ init_waitqueue_head(&sysrq_thread_queue); ++ init_waitqueue_head(&sysrq_thread_waiter_queue); ++ if ((rc = kernel_thread(netconsole_sysrq_schedule, NULL, 0)) < 0 ){ ++ printk(KERN_ERR "Can not start netconsole sysrq thread: rc %d\n", rc); ++ return -1; ++ } ++ + #define STARTUP_MSG "[...network console startup...]\n" + write_netconsole_msg(NULL, STARTUP_MSG, strlen(STARTUP_MSG)); + +@@ -1230,7 +1324,11 @@ + + static void cleanup_netconsole(void) + { +- printk(KERN_INFO "netlog: network logging shut down.\n"); ++ stop_sysrq_thread = 1; ++ ++ wake_up(&sysrq_thread_queue); ++ wait_event(sysrq_thread_waiter_queue, thread_stopped); ++ printk(KERN_INFO"netlog: network logging shut down.\n"); + unregister_console(&netconsole); + + #define SHUTDOWN_MSG "[...network console shutdown...]\n" +Index: linux-2.4.20-rh/drivers/net/netconsole.h +=================================================================== +--- linux-2.4.20-rh.orig/drivers/net/netconsole.h 2003-07-22 16:02:23.000000000 +0800 ++++ linux-2.4.20-rh/drivers/net/netconsole.h 2003-10-30 01:48:45.000000000 +0800 +@@ -29,7 +29,7 @@ + * + ****************************************************************/ + +-#define NETCONSOLE_VERSION 0x04 ++#define NETCONSOLE_VERSION 0x03 + + enum netdump_commands { + COMM_NONE = 0, +@@ -42,6 +42,8 @@ + COMM_START_NETDUMP_ACK = 7, + COMM_GET_REGS = 8, + COMM_SHOW_STATE = 9, ++ COMM_START_WRITE_NETDUMP_ACK = 10, ++ COMM_SYSRQ = 11, + }; + + #define NETDUMP_REQ_SIZE (8+4*4) +@@ -69,6 +71,7 @@ + REPLY_REGS = 10, + REPLY_MAGIC = 11, + REPLY_SHOW_STATE = 12, ++ REPLY_SYSRQ = 13, + }; + + typedef struct netdump_reply_s { +@@ -78,4 +81,22 @@ + } reply_t; + + #define HEADER_LEN (1 + sizeof(reply_t)) +- ++/* for netconsole */ ++static inline void get_current_regs(struct pt_regs *regs) ++{ ++ __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx)); ++ __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx)); ++ __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx)); ++ __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi)); ++ __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi)); ++ __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp)); ++ __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax)); ++ __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp)); ++ __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss)); ++ __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs)); ++ __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds)); ++ __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes)); ++ __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags)); ++ regs->eip = (unsigned long)current_text_addr(); ++} ++ +Index: linux-2.4.20-rh/arch/i386/kernel/irq.c +=================================================================== +--- linux-2.4.20-rh.orig/arch/i386/kernel/irq.c 2003-10-30 08:29:38.000000000 +0800 ++++ linux-2.4.20-rh/arch/i386/kernel/irq.c 2003-10-30 08:30:13.000000000 +0800 +@@ -1043,7 +1043,20 @@ + register_irq_proc(irq); + return 0; + } ++struct irqaction *find_irq_action(unsigned int irq, void *dev_id) ++{ ++ struct irqaction *a, *r=0; + ++ spin_lock_irq(&irq_desc[irq].lock); ++ for(a=irq_desc[irq].action; a; a=a->next) { ++ if(a->dev_id == dev_id) { ++ r=a; ++ break; ++ } ++ } ++ spin_unlock_irq(&irq_desc[irq].lock); ++ return r; ++} + + static struct proc_dir_entry * root_irq_dir; + static struct proc_dir_entry * irq_dir [NR_IRQS]; +Index: linux-2.4.20-rh/net/core/dev.c +=================================================================== +--- linux-2.4.20-rh.orig/net/core/dev.c 2003-10-29 01:40:26.000000000 +0800 ++++ linux-2.4.20-rh/net/core/dev.c 2003-10-30 01:48:45.000000000 +0800 +@@ -1475,6 +1475,16 @@ + + skb_bond(skb); + ++ if (unlikely(skb->dev->rx_hook != NULL)) { ++ int ret; ++ ++ ret = skb->dev->rx_hook(skb); ++ if (ret == NET_RX_DROP){ ++ kfree_skb(skb); ++ return ret; ++ } ++ } ++ + netdev_rx_stat[smp_processor_id()].total++; + + #ifdef CONFIG_NET_FASTROUTE +Index: linux-2.4.20-rh/include/asm-i386/irq.h +=================================================================== +--- linux-2.4.20-rh.orig/include/asm-i386/irq.h 2003-10-28 16:18:18.000000000 +0800 ++++ linux-2.4.20-rh/include/asm-i386/irq.h 2003-10-30 10:24:49.000000000 +0800 +@@ -38,7 +38,7 @@ + extern void disable_irq_nosync(unsigned int); + extern void enable_irq(unsigned int); + extern void release_x86_irqs(struct task_struct *); +- ++extern struct irqaction *find_irq_action(unsigned int irq, void *dev_id); + #ifdef CONFIG_X86_LOCAL_APIC + #define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ + #endif +Index: linux-2.4.20-rh/arch/i386/kernel/i386_ksyms.c +=================================================================== +--- linux-2.4.20-rh.orig/arch/i386/kernel/i386_ksyms.c 2003-10-28 19:44:57.000000000 +0800 ++++ linux-2.4.20-rh/arch/i386/kernel/i386_ksyms.c 2003-10-30 11:14:55.000000000 +0800 +@@ -68,6 +68,7 @@ + EXPORT_SYMBOL(iounmap); + EXPORT_SYMBOL(enable_irq); + EXPORT_SYMBOL(disable_irq); ++EXPORT_SYMBOL(find_irq_action); + EXPORT_SYMBOL(disable_irq_nosync); + EXPORT_SYMBOL(probe_irq_mask); + EXPORT_SYMBOL(kernel_thread); +@@ -199,7 +200,6 @@ + EXPORT_SYMBOL(edd); + EXPORT_SYMBOL(eddnr); + #endif +- + EXPORT_SYMBOL_GPL(show_mem); + EXPORT_SYMBOL_GPL(show_state); + EXPORT_SYMBOL_GPL(show_regs); diff --git a/lustre/kernel_patches/series/suse-2.4.21 b/lustre/kernel_patches/series/suse-2.4.21 new file mode 100644 index 0000000..deb086e --- /dev/null +++ b/lustre/kernel_patches/series/suse-2.4.21 @@ -0,0 +1,34 @@ +dev_read_only_2.4.20-rh.patch +exports_2.4.20.patch +kmem_cache_validate_2.4.20.patch +lustre_version.patch +vfs_intent-2.4.20-vanilla.patch +invalidate_show.patch +export-truncate.patch +iod-stock-24-exports.patch +ext3-htree-suse.patch +linux-2.4.21-xattr-0.8.54-suse.patch +ext3-orphan_lock-suse.patch +ext3-noread-2.4.20.patch +ext3-delete_thread-2.4.20.patch +extN-wantedi.patch +ext3-san-2.4.20.patch +ext3-map_inode_page.patch +ext3-error-export.patch +iopen-2.4.20.patch +tcp-zero-copy.patch +jbd-dont-account-blocks-twice.patch +jbd-commit-tricks.patch +ext3-no-write-super.patch +add_page_private.patch +socket-exports-vanilla.patch +removepage-2.4.20.patch +jbd-ctx_switch.patch +jbd-flushtime.patch +jbd-get_write_access.patch +nfs_export_kernel-2.4.20.patch +ext3-raw-lookup.patch +ext3-ea-in-inode-2.4.20.patch +listman-2.4.20.patch +ext3-trusted_ea-2.4.20.patch +ext3-inode-reuse-2.4.20.patch diff --git a/lustre/portals/include/portals/socknal.h b/lustre/portals/include/portals/socknal.h new file mode 100644 index 0000000..6d75e5f --- /dev/null +++ b/lustre/portals/include/portals/socknal.h @@ -0,0 +1,13 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * + * + * #defines shared between socknal implementation and utilities + */ + +#define SOCKNAL_CONN_ANY 0 +#define SOCKNAL_CONN_CONTROL 1 +#define SOCKNAL_CONN_BULK_IN 2 +#define SOCKNAL_CONN_BULK_OUT 3 +#define SOCKNAL_CONN_NTYPES 4 diff --git a/lustre/utils/wirehdr.c b/lustre/utils/wirehdr.c index 0f71339..4b1b65f 100644 --- a/lustre/utils/wirehdr.c +++ b/lustre/utils/wirehdr.c @@ -4,7 +4,9 @@ #include #undef LASSERT -#define LASSERT(cond) if (!(cond)) printf("failed " #cond "\n"); +#define LASSERT(cond) if (!(cond)) { printf("failed " #cond "\n"); ret = 1; } + +int ret; void lustre_assert_wire_constants(void); @@ -12,5 +14,5 @@ int main() { lustre_assert_wire_constants(); - return 0; + return ret; } diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c new file mode 100644 index 0000000..b42f4d4 --- /dev/null +++ b/lustre/utils/wiretest.c @@ -0,0 +1,672 @@ +#include +#include +#include +#include + +#undef LASSERT +#define LASSERT(cond) if (!(cond)) { printf("failed " #cond "\n"); ret = 1; } + +int ret; + +void lustre_assert_wire_constants(void); + +int main() +{ + lustre_assert_wire_constants(); + + return ret; +} +void lustre_assert_wire_constants(void) +{ + /* Wire protocol assertions generated by 'wirecheck' */ + + /* Constants... */ + LASSERT(PTLRPC_MSG_MAGIC == 0x0BD00BD0); + LASSERT(PTLRPC_MSG_VERSION == 0x00000003); + LASSERT(PTL_RPC_MSG_REQUEST == 4711); + LASSERT(PTL_RPC_MSG_ERR == 4712); + LASSERT(PTL_RPC_MSG_REPLY == 4713); + LASSERT(MSG_LAST_REPLAY == 1); + LASSERT(MSG_RESENT == 2); + LASSERT(MSG_CONNECT_RECOVERING == 1); + LASSERT(MSG_CONNECT_RECONNECT == 2); + LASSERT(MSG_CONNECT_REPLAYABLE == 4); + LASSERT(OST_REPLY == 0); + LASSERT(OST_GETATTR == 1); + LASSERT(OST_SETATTR == 2); + LASSERT(OST_READ == 3); + LASSERT(OST_WRITE == 4); + LASSERT(OST_CREATE == 5); + LASSERT(OST_DESTROY == 6); + LASSERT(OST_GET_INFO == 7); + LASSERT(OST_CONNECT == 8); + LASSERT(OST_DISCONNECT == 9); + LASSERT(OST_PUNCH == 10); + LASSERT(OST_OPEN == 11); + LASSERT(OST_CLOSE == 12); + LASSERT(OST_STATFS == 13); + LASSERT(OST_SAN_READ == 14); + LASSERT(OST_SAN_WRITE == 15); + LASSERT(OST_SYNC == 16); + LASSERT(OST_LAST_OPC == 18); + LASSERT(OST_FIRST_OPC == 0); + LASSERT(OBD_FL_INLINEDATA == 1); + LASSERT(OBD_FL_OBDMDEXISTS == 2); + LASSERT(OBD_OBJECT_EOF == 0xffffffffffffffffULL); + LASSERT(OST_REQ_HAS_OA1 == 1); + LASSERT(MDS_GETATTR == 33); + LASSERT(MDS_GETATTR_NAME == 34); + LASSERT(MDS_CLOSE == 35); + LASSERT(MDS_REINT == 36); + LASSERT(MDS_READPAGE == 37); + LASSERT(MDS_CONNECT == 38); + LASSERT(MDS_DISCONNECT == 39); + LASSERT(MDS_GETSTATUS == 40); + LASSERT(MDS_STATFS == 41); + LASSERT(MDS_LAST_OPC == 46); + LASSERT(MDS_FIRST_OPC == 33); + LASSERT(REINT_SETATTR == 1); + LASSERT(REINT_CREATE == 2); + LASSERT(REINT_LINK == 3); + LASSERT(REINT_UNLINK == 4); + LASSERT(REINT_RENAME == 5); + LASSERT(REINT_OPEN == 6); + LASSERT(REINT_MAX == 6); + LASSERT(DISP_IT_EXECD == 1); + LASSERT(DISP_LOOKUP_EXECD == 2); + LASSERT(DISP_LOOKUP_NEG == 4); + LASSERT(DISP_LOOKUP_POS == 8); + LASSERT(DISP_OPEN_CREATE == 16); + LASSERT(DISP_OPEN_OPEN == 32); + LASSERT(MDS_STATUS_CONN == 1); + LASSERT(MDS_STATUS_LOV == 2); + LASSERT(MDS_OPEN_HAS_EA == 1073741824); + LASSERT(LDLM_ENQUEUE == 101); + LASSERT(LDLM_CONVERT == 102); + LASSERT(LDLM_CANCEL == 103); + LASSERT(LDLM_BL_CALLBACK == 104); + LASSERT(LDLM_CP_CALLBACK == 105); + LASSERT(LDLM_LAST_OPC == 106); + LASSERT(LDLM_FIRST_OPC == 101); + LASSERT(PTLBD_QUERY == 200); + LASSERT(PTLBD_READ == 201); + LASSERT(PTLBD_WRITE == 202); + LASSERT(PTLBD_FLUSH == 203); + LASSERT(PTLBD_CONNECT == 204); + LASSERT(PTLBD_DISCONNECT == 205); + LASSERT(PTLBD_LAST_OPC == 206); + LASSERT(PTLBD_FIRST_OPC == 200); + LASSERT(OBD_PING == 400); + /* Sizes and Offsets */ + + + /* Checks for struct lustre_handle */ + LASSERT((int)sizeof(struct lustre_handle) == 8); + LASSERT(offsetof(struct lustre_handle, cookie) == 0); + LASSERT((int)sizeof(((struct lustre_handle *)0)->cookie) == 8); + + /* Checks for struct lustre_msg */ + LASSERT((int)sizeof(struct lustre_msg) == 64); + LASSERT(offsetof(struct lustre_msg, handle) == 0); + LASSERT((int)sizeof(((struct lustre_msg *)0)->handle) == 8); + LASSERT(offsetof(struct lustre_msg, magic) == 8); + LASSERT((int)sizeof(((struct lustre_msg *)0)->magic) == 4); + LASSERT(offsetof(struct lustre_msg, type) == 12); + LASSERT((int)sizeof(((struct lustre_msg *)0)->type) == 4); + LASSERT(offsetof(struct lustre_msg, version) == 16); + LASSERT((int)sizeof(((struct lustre_msg *)0)->version) == 4); + LASSERT(offsetof(struct lustre_msg, opc) == 20); + LASSERT((int)sizeof(((struct lustre_msg *)0)->opc) == 4); + LASSERT(offsetof(struct lustre_msg, last_xid) == 24); + LASSERT((int)sizeof(((struct lustre_msg *)0)->last_xid) == 8); + LASSERT(offsetof(struct lustre_msg, last_committed) == 32); + LASSERT((int)sizeof(((struct lustre_msg *)0)->last_committed) == 8); + LASSERT(offsetof(struct lustre_msg, transno) == 40); + LASSERT((int)sizeof(((struct lustre_msg *)0)->transno) == 8); + LASSERT(offsetof(struct lustre_msg, status) == 48); + LASSERT((int)sizeof(((struct lustre_msg *)0)->status) == 4); + LASSERT(offsetof(struct lustre_msg, flags) == 52); + LASSERT((int)sizeof(((struct lustre_msg *)0)->flags) == 4); + LASSERT(offsetof(struct lustre_msg, bufcount) == 60); + LASSERT((int)sizeof(((struct lustre_msg *)0)->bufcount) == 4); + LASSERT(offsetof(struct lustre_msg, buflens[7]) == 92); + LASSERT((int)sizeof(((struct lustre_msg *)0)->buflens[7]) == 4); + + /* Checks for struct obdo */ + LASSERT((int)sizeof(struct obdo) == 168); + LASSERT(offsetof(struct obdo, o_id) == 0); + LASSERT((int)sizeof(((struct obdo *)0)->o_id) == 8); + LASSERT(offsetof(struct obdo, o_gr) == 8); + LASSERT((int)sizeof(((struct obdo *)0)->o_gr) == 8); + LASSERT(offsetof(struct obdo, o_atime) == 16); + LASSERT((int)sizeof(((struct obdo *)0)->o_atime) == 8); + LASSERT(offsetof(struct obdo, o_mtime) == 24); + LASSERT((int)sizeof(((struct obdo *)0)->o_mtime) == 8); + LASSERT(offsetof(struct obdo, o_ctime) == 32); + LASSERT((int)sizeof(((struct obdo *)0)->o_ctime) == 8); + LASSERT(offsetof(struct obdo, o_size) == 40); + LASSERT((int)sizeof(((struct obdo *)0)->o_size) == 8); + LASSERT(offsetof(struct obdo, o_blocks) == 48); + LASSERT((int)sizeof(((struct obdo *)0)->o_blocks) == 8); + LASSERT(offsetof(struct obdo, o_rdev) == 56); + LASSERT((int)sizeof(((struct obdo *)0)->o_rdev) == 8); + LASSERT(offsetof(struct obdo, o_blksize) == 64); + LASSERT((int)sizeof(((struct obdo *)0)->o_blksize) == 4); + LASSERT(offsetof(struct obdo, o_mode) == 68); + LASSERT((int)sizeof(((struct obdo *)0)->o_mode) == 4); + LASSERT(offsetof(struct obdo, o_uid) == 72); + LASSERT((int)sizeof(((struct obdo *)0)->o_uid) == 4); + LASSERT(offsetof(struct obdo, o_gid) == 76); + LASSERT((int)sizeof(((struct obdo *)0)->o_gid) == 4); + LASSERT(offsetof(struct obdo, o_flags) == 80); + LASSERT((int)sizeof(((struct obdo *)0)->o_flags) == 4); + LASSERT(offsetof(struct obdo, o_nlink) == 84); + LASSERT((int)sizeof(((struct obdo *)0)->o_nlink) == 4); + LASSERT(offsetof(struct obdo, o_generation) == 88); + LASSERT((int)sizeof(((struct obdo *)0)->o_generation) == 4); + LASSERT(offsetof(struct obdo, o_valid) == 92); + LASSERT((int)sizeof(((struct obdo *)0)->o_valid) == 4); + LASSERT(offsetof(struct obdo, o_obdflags) == 96); + LASSERT((int)sizeof(((struct obdo *)0)->o_obdflags) == 4); + LASSERT(offsetof(struct obdo, o_easize) == 100); + LASSERT((int)sizeof(((struct obdo *)0)->o_easize) == 4); + LASSERT(offsetof(struct obdo, o_inline) == 104); + LASSERT((int)sizeof(((struct obdo *)0)->o_inline) == 64); + LASSERT(OBD_MD_FLID == 1); + LASSERT(OBD_MD_FLATIME == 2); + LASSERT(OBD_MD_FLMTIME == 4); + LASSERT(OBD_MD_FLCTIME == 8); + LASSERT(OBD_MD_FLSIZE == 16); + LASSERT(OBD_MD_FLBLOCKS == 32); + LASSERT(OBD_MD_FLBLKSZ == 64); + LASSERT(OBD_MD_FLMODE == 128); + LASSERT(OBD_MD_FLTYPE == 256); + LASSERT(OBD_MD_FLUID == 512); + LASSERT(OBD_MD_FLGID == 1024); + LASSERT(OBD_MD_FLFLAGS == 2048); + LASSERT(OBD_MD_FLOBDFLG == 4096); + LASSERT(OBD_MD_FLNLINK == 8192); + LASSERT(OBD_MD_FLGENER == 16384); + LASSERT(OBD_MD_FLINLINE == 32768); + LASSERT(OBD_MD_FLRDEV == 65536); + LASSERT(OBD_MD_FLEASIZE == 131072); + LASSERT(OBD_MD_LINKNAME == 262144); + LASSERT(OBD_MD_FLHANDLE == 524288); + LASSERT(OBD_MD_FLCKSUM == 1048576); + LASSERT(OBD_MD_FLQOS == 2097152); + LASSERT(OBD_MD_FLOSCOPQ == 4194304); + LASSERT(OBD_MD_FLCOOKIE == 8388608); + LASSERT(OBD_MD_FLGROUP == 16777216); + + /* Checks for struct lov_mds_md_v1 */ + LASSERT((int)sizeof(struct lov_mds_md_v1) == 32); + LASSERT(offsetof(struct lov_mds_md_v1, lmm_magic) == 0); + LASSERT((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic) == 4); + LASSERT(offsetof(struct lov_mds_md_v1, lmm_pattern) == 4); + LASSERT((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern) == 4); + LASSERT(offsetof(struct lov_mds_md_v1, lmm_object_id) == 8); + LASSERT((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_object_id) == 8); + LASSERT(offsetof(struct lov_mds_md_v1, lmm_object_gr) == 16); + LASSERT((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_object_gr) == 8); + LASSERT(offsetof(struct lov_mds_md_v1, lmm_stripe_size) == 24); + LASSERT((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size) == 4); + LASSERT(offsetof(struct lov_mds_md_v1, lmm_stripe_count) == 28); + LASSERT((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count) == 4); + LASSERT(offsetof(struct lov_mds_md_v1, lmm_objects) == 32); + LASSERT((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects) == 0); + + /* Checks for struct lov_ost_data_v1 */ + LASSERT((int)sizeof(struct lov_ost_data_v1) == 24); + LASSERT(offsetof(struct lov_ost_data_v1, l_object_id) == 0); + LASSERT((int)sizeof(((struct lov_ost_data_v1 *)0)->l_object_id) == 8); + LASSERT(offsetof(struct lov_ost_data_v1, l_object_gr) == 8); + LASSERT((int)sizeof(((struct lov_ost_data_v1 *)0)->l_object_gr) == 8); + LASSERT(offsetof(struct lov_ost_data_v1, l_ost_gen) == 16); + LASSERT((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen) == 4); + LASSERT(offsetof(struct lov_ost_data_v1, l_ost_idx) == 20); + LASSERT((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4); + LASSERT(LOV_MAGIC_V0 == 198183888); + LASSERT(LOV_MAGIC_V1 == 198249424); + LASSERT(LOV_PATTERN_RAID0 == 1); + LASSERT(LOV_PATTERN_RAID1 == 2); + + /* Checks for struct obd_statfs */ + LASSERT((int)sizeof(struct obd_statfs) == 144); + LASSERT(offsetof(struct obd_statfs, os_type) == 0); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_type) == 8); + LASSERT(offsetof(struct obd_statfs, os_blocks) == 8); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_blocks) == 8); + LASSERT(offsetof(struct obd_statfs, os_bfree) == 16); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_bfree) == 8); + LASSERT(offsetof(struct obd_statfs, os_bavail) == 24); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_bavail) == 8); + LASSERT(offsetof(struct obd_statfs, os_ffree) == 40); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_ffree) == 8); + LASSERT(offsetof(struct obd_statfs, os_fsid) == 48); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_fsid) == 40); + LASSERT(offsetof(struct obd_statfs, os_bsize) == 88); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_bsize) == 4); + LASSERT(offsetof(struct obd_statfs, os_namelen) == 92); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_namelen) == 4); + LASSERT(offsetof(struct obd_statfs, os_spare) == 104); + LASSERT((int)sizeof(((struct obd_statfs *)0)->os_spare) == 40); + + /* Checks for struct obd_ioobj */ + LASSERT((int)sizeof(struct obd_ioobj) == 24); + LASSERT(offsetof(struct obd_ioobj, ioo_id) == 0); + LASSERT((int)sizeof(((struct obd_ioobj *)0)->ioo_id) == 8); + LASSERT(offsetof(struct obd_ioobj, ioo_gr) == 8); + LASSERT((int)sizeof(((struct obd_ioobj *)0)->ioo_gr) == 8); + LASSERT(offsetof(struct obd_ioobj, ioo_type) == 16); + LASSERT((int)sizeof(((struct obd_ioobj *)0)->ioo_type) == 4); + LASSERT(offsetof(struct obd_ioobj, ioo_bufcnt) == 20); + LASSERT((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4); + + /* Checks for struct niobuf_remote */ + LASSERT((int)sizeof(struct niobuf_remote) == 16); + LASSERT(offsetof(struct niobuf_remote, offset) == 0); + LASSERT((int)sizeof(((struct niobuf_remote *)0)->offset) == 8); + LASSERT(offsetof(struct niobuf_remote, len) == 8); + LASSERT((int)sizeof(((struct niobuf_remote *)0)->len) == 4); + LASSERT(offsetof(struct niobuf_remote, flags) == 12); + LASSERT((int)sizeof(((struct niobuf_remote *)0)->flags) == 4); + LASSERT(OBD_BRW_READ == 1); + LASSERT(OBD_BRW_WRITE == 2); + LASSERT(OBD_BRW_CREATE == 4); + LASSERT(OBD_BRW_SYNC == 8); + + /* Checks for struct ost_body */ + LASSERT((int)sizeof(struct ost_body) == 168); + LASSERT(offsetof(struct ost_body, oa) == 0); + LASSERT((int)sizeof(((struct ost_body *)0)->oa) == 168); + + /* Checks for struct ll_fid */ + LASSERT((int)sizeof(struct ll_fid) == 16); + LASSERT(offsetof(struct ll_fid, id) == 0); + LASSERT((int)sizeof(((struct ll_fid *)0)->id) == 8); + LASSERT(offsetof(struct ll_fid, generation) == 8); + LASSERT((int)sizeof(((struct ll_fid *)0)->generation) == 4); + LASSERT(offsetof(struct ll_fid, f_type) == 12); + LASSERT((int)sizeof(((struct ll_fid *)0)->f_type) == 4); + + /* Checks for struct mds_status_req */ + LASSERT((int)sizeof(struct mds_status_req) == 8); + LASSERT(offsetof(struct mds_status_req, flags) == 0); + LASSERT((int)sizeof(((struct mds_status_req *)0)->flags) == 4); + LASSERT(offsetof(struct mds_status_req, repbuf) == 4); + LASSERT((int)sizeof(((struct mds_status_req *)0)->repbuf) == 4); + + /* Checks for struct mds_body */ + LASSERT((int)sizeof(struct mds_body) == 128); + LASSERT(offsetof(struct mds_body, fid1) == 0); + LASSERT((int)sizeof(((struct mds_body *)0)->fid1) == 16); + LASSERT(offsetof(struct mds_body, fid2) == 16); + LASSERT((int)sizeof(((struct mds_body *)0)->fid2) == 16); + LASSERT(offsetof(struct mds_body, handle) == 32); + LASSERT((int)sizeof(((struct mds_body *)0)->handle) == 8); + LASSERT(offsetof(struct mds_body, size) == 40); + LASSERT((int)sizeof(((struct mds_body *)0)->size) == 8); + LASSERT(offsetof(struct mds_body, blocks) == 48); + LASSERT((int)sizeof(((struct mds_body *)0)->blocks) == 8); + LASSERT(offsetof(struct mds_body, ino) == 56); + LASSERT((int)sizeof(((struct mds_body *)0)->ino) == 4); + LASSERT(offsetof(struct mds_body, valid) == 60); + LASSERT((int)sizeof(((struct mds_body *)0)->valid) == 4); + LASSERT(offsetof(struct mds_body, fsuid) == 64); + LASSERT((int)sizeof(((struct mds_body *)0)->fsuid) == 4); + LASSERT(offsetof(struct mds_body, fsgid) == 68); + LASSERT((int)sizeof(((struct mds_body *)0)->fsgid) == 4); + LASSERT(offsetof(struct mds_body, capability) == 72); + LASSERT((int)sizeof(((struct mds_body *)0)->capability) == 4); + LASSERT(offsetof(struct mds_body, mode) == 76); + LASSERT((int)sizeof(((struct mds_body *)0)->mode) == 4); + LASSERT(offsetof(struct mds_body, uid) == 80); + LASSERT((int)sizeof(((struct mds_body *)0)->uid) == 4); + LASSERT(offsetof(struct mds_body, gid) == 84); + LASSERT((int)sizeof(((struct mds_body *)0)->gid) == 4); + LASSERT(offsetof(struct mds_body, mtime) == 88); + LASSERT((int)sizeof(((struct mds_body *)0)->mtime) == 4); + LASSERT(offsetof(struct mds_body, ctime) == 92); + LASSERT((int)sizeof(((struct mds_body *)0)->ctime) == 4); + LASSERT(offsetof(struct mds_body, atime) == 96); + LASSERT((int)sizeof(((struct mds_body *)0)->atime) == 4); + LASSERT(offsetof(struct mds_body, flags) == 100); + LASSERT((int)sizeof(((struct mds_body *)0)->flags) == 4); + LASSERT(offsetof(struct mds_body, rdev) == 104); + LASSERT((int)sizeof(((struct mds_body *)0)->rdev) == 4); + LASSERT(offsetof(struct mds_body, nlink) == 108); + LASSERT((int)sizeof(((struct mds_body *)0)->nlink) == 4); + LASSERT(offsetof(struct mds_body, generation) == 112); + LASSERT((int)sizeof(((struct mds_body *)0)->generation) == 4); + LASSERT(offsetof(struct mds_body, suppgid) == 116); + LASSERT((int)sizeof(((struct mds_body *)0)->suppgid) == 4); + LASSERT(offsetof(struct mds_body, eadatasize) == 120); + LASSERT((int)sizeof(((struct mds_body *)0)->eadatasize) == 4); + + /* Checks for struct mds_rec_setattr */ + LASSERT((int)sizeof(struct mds_rec_setattr) == 88); + LASSERT(offsetof(struct mds_rec_setattr, sa_opcode) == 0); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_opcode) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_fsuid) == 4); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_fsuid) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_fsgid) == 8); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_fsgid) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_cap) == 12); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_cap) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_suppgid) == 16); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_suppgid) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_valid) == 20); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_valid) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_fid) == 24); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_fid) == 16); + LASSERT(offsetof(struct mds_rec_setattr, sa_mode) == 40); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_mode) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_uid) == 44); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_uid) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_gid) == 48); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_gid) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_attr_flags) == 52); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_attr_flags) == 4); + LASSERT(offsetof(struct mds_rec_setattr, sa_size) == 56); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_size) == 8); + LASSERT(offsetof(struct mds_rec_setattr, sa_atime) == 64); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_atime) == 8); + LASSERT(offsetof(struct mds_rec_setattr, sa_mtime) == 72); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_mtime) == 8); + LASSERT(offsetof(struct mds_rec_setattr, sa_ctime) == 80); + LASSERT((int)sizeof(((struct mds_rec_setattr *)0)->sa_ctime) == 8); + + /* Checks for struct mds_rec_create */ + LASSERT((int)sizeof(struct mds_rec_create) == 80); + LASSERT(offsetof(struct mds_rec_create, cr_opcode) == 0); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_opcode) == 4); + LASSERT(offsetof(struct mds_rec_create, cr_fsuid) == 4); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_fsuid) == 4); + LASSERT(offsetof(struct mds_rec_create, cr_fsgid) == 8); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_fsgid) == 4); + LASSERT(offsetof(struct mds_rec_create, cr_cap) == 12); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_cap) == 4); + LASSERT(offsetof(struct mds_rec_create, cr_flags) == 16); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_flags) == 4); + LASSERT(offsetof(struct mds_rec_create, cr_mode) == 20); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_mode) == 4); + LASSERT(offsetof(struct mds_rec_create, cr_fid) == 24); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_fid) == 16); + LASSERT(offsetof(struct mds_rec_create, cr_replayfid) == 40); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_replayfid) == 16); + LASSERT(offsetof(struct mds_rec_create, cr_time) == 56); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_time) == 8); + LASSERT(offsetof(struct mds_rec_create, cr_rdev) == 64); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_rdev) == 8); + LASSERT(offsetof(struct mds_rec_create, cr_suppgid) == 72); + LASSERT((int)sizeof(((struct mds_rec_create *)0)->cr_suppgid) == 4); + + /* Checks for struct mds_rec_link */ + LASSERT((int)sizeof(struct mds_rec_link) == 64); + LASSERT(offsetof(struct mds_rec_link, lk_opcode) == 0); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_opcode) == 4); + LASSERT(offsetof(struct mds_rec_link, lk_fsuid) == 4); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_fsuid) == 4); + LASSERT(offsetof(struct mds_rec_link, lk_fsgid) == 8); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_fsgid) == 4); + LASSERT(offsetof(struct mds_rec_link, lk_cap) == 12); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_cap) == 4); + LASSERT(offsetof(struct mds_rec_link, lk_suppgid1) == 16); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_suppgid1) == 4); + LASSERT(offsetof(struct mds_rec_link, lk_suppgid2) == 20); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_suppgid2) == 4); + LASSERT(offsetof(struct mds_rec_link, lk_fid1) == 24); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_fid1) == 16); + LASSERT(offsetof(struct mds_rec_link, lk_fid2) == 40); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_fid2) == 16); + LASSERT(offsetof(struct mds_rec_link, lk_time) == 56); + LASSERT((int)sizeof(((struct mds_rec_link *)0)->lk_time) == 8); + + /* Checks for struct mds_rec_unlink */ + LASSERT((int)sizeof(struct mds_rec_unlink) == 64); + LASSERT(offsetof(struct mds_rec_unlink, ul_opcode) == 0); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_opcode) == 4); + LASSERT(offsetof(struct mds_rec_unlink, ul_fsuid) == 4); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_fsuid) == 4); + LASSERT(offsetof(struct mds_rec_unlink, ul_fsgid) == 8); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_fsgid) == 4); + LASSERT(offsetof(struct mds_rec_unlink, ul_cap) == 12); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_cap) == 4); + LASSERT(offsetof(struct mds_rec_unlink, ul_suppgid) == 16); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_suppgid) == 4); + LASSERT(offsetof(struct mds_rec_unlink, ul_mode) == 20); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_mode) == 4); + LASSERT(offsetof(struct mds_rec_unlink, ul_fid1) == 24); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_fid1) == 16); + LASSERT(offsetof(struct mds_rec_unlink, ul_fid2) == 40); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_fid2) == 16); + LASSERT(offsetof(struct mds_rec_unlink, ul_time) == 56); + LASSERT((int)sizeof(((struct mds_rec_unlink *)0)->ul_time) == 8); + + /* Checks for struct mds_rec_rename */ + LASSERT((int)sizeof(struct mds_rec_rename) == 64); + LASSERT(offsetof(struct mds_rec_rename, rn_opcode) == 0); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_opcode) == 4); + LASSERT(offsetof(struct mds_rec_rename, rn_fsuid) == 4); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_fsuid) == 4); + LASSERT(offsetof(struct mds_rec_rename, rn_fsgid) == 8); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_fsgid) == 4); + LASSERT(offsetof(struct mds_rec_rename, rn_cap) == 12); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_cap) == 4); + LASSERT(offsetof(struct mds_rec_rename, rn_suppgid1) == 16); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_suppgid1) == 4); + LASSERT(offsetof(struct mds_rec_rename, rn_suppgid2) == 20); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_suppgid2) == 4); + LASSERT(offsetof(struct mds_rec_rename, rn_fid1) == 24); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_fid1) == 16); + LASSERT(offsetof(struct mds_rec_rename, rn_fid2) == 40); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_fid2) == 16); + LASSERT(offsetof(struct mds_rec_rename, rn_time) == 56); + LASSERT((int)sizeof(((struct mds_rec_rename *)0)->rn_time) == 8); + + /* Checks for struct lov_desc */ + LASSERT((int)sizeof(struct lov_desc) == 72); + LASSERT(offsetof(struct lov_desc, ld_tgt_count) == 0); + LASSERT((int)sizeof(((struct lov_desc *)0)->ld_tgt_count) == 4); + LASSERT(offsetof(struct lov_desc, ld_active_tgt_count) == 4); + LASSERT((int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count) == 4); + LASSERT(offsetof(struct lov_desc, ld_default_stripe_count) == 8); + LASSERT((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count) == 4); + LASSERT(offsetof(struct lov_desc, ld_pattern) == 12); + LASSERT((int)sizeof(((struct lov_desc *)0)->ld_pattern) == 4); + LASSERT(offsetof(struct lov_desc, ld_default_stripe_size) == 16); + LASSERT((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size) == 8); + LASSERT(offsetof(struct lov_desc, ld_default_stripe_offset) == 24); + LASSERT((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8); + LASSERT(offsetof(struct lov_desc, ld_uuid) == 32); + LASSERT((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40); + + /* Checks for struct ldlm_res_id */ + LASSERT((int)sizeof(struct ldlm_res_id) == 32); + LASSERT(offsetof(struct ldlm_res_id, name[4]) == 32); + LASSERT((int)sizeof(((struct ldlm_res_id *)0)->name[4]) == 8); + + /* Checks for struct ldlm_extent */ + LASSERT((int)sizeof(struct ldlm_extent) == 16); + LASSERT(offsetof(struct ldlm_extent, start) == 0); + LASSERT((int)sizeof(((struct ldlm_extent *)0)->start) == 8); + LASSERT(offsetof(struct ldlm_extent, end) == 8); + LASSERT((int)sizeof(((struct ldlm_extent *)0)->end) == 8); + + /* Checks for struct ldlm_flock */ + LASSERT((int)sizeof(struct ldlm_flock) == 32); + LASSERT(offsetof(struct ldlm_flock, start) == 0); + LASSERT((int)sizeof(((struct ldlm_flock *)0)->start) == 8); + LASSERT(offsetof(struct ldlm_flock, end) == 8); + LASSERT((int)sizeof(((struct ldlm_flock *)0)->end) == 8); + LASSERT(offsetof(struct ldlm_flock, blocking_export) == 16); + LASSERT((int)sizeof(((struct ldlm_flock *)0)->blocking_export) == 8); + LASSERT(offsetof(struct ldlm_flock, blocking_pid) == 24); + LASSERT((int)sizeof(((struct ldlm_flock *)0)->blocking_pid) == 4); + LASSERT(offsetof(struct ldlm_flock, pid) == 28); + LASSERT((int)sizeof(((struct ldlm_flock *)0)->pid) == 4); + + /* Checks for struct ldlm_intent */ + LASSERT((int)sizeof(struct ldlm_intent) == 8); + LASSERT(offsetof(struct ldlm_intent, opc) == 0); + LASSERT((int)sizeof(((struct ldlm_intent *)0)->opc) == 8); + + /* Checks for struct ldlm_resource_desc */ + LASSERT((int)sizeof(struct ldlm_resource_desc) == 52); + LASSERT(offsetof(struct ldlm_resource_desc, lr_type) == 0); + LASSERT((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4); + LASSERT(offsetof(struct ldlm_resource_desc, lr_name) == 4); + LASSERT((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32); + LASSERT(offsetof(struct ldlm_resource_desc, lr_version[4]) == 52); + LASSERT((int)sizeof(((struct ldlm_resource_desc *)0)->lr_version[4]) == 4); + + /* Checks for struct ldlm_lock_desc */ + LASSERT((int)sizeof(struct ldlm_lock_desc) == 108); + LASSERT(offsetof(struct ldlm_lock_desc, l_resource) == 0); + LASSERT((int)sizeof(((struct ldlm_lock_desc *)0)->l_resource) == 52); + LASSERT(offsetof(struct ldlm_lock_desc, l_req_mode) == 52); + LASSERT((int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode) == 4); + LASSERT(offsetof(struct ldlm_lock_desc, l_granted_mode) == 56); + LASSERT((int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode) == 4); + LASSERT(offsetof(struct ldlm_lock_desc, l_policy_data) == 60); + LASSERT((int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data) == 32); + LASSERT(offsetof(struct ldlm_lock_desc, l_version[4]) == 108); + LASSERT((int)sizeof(((struct ldlm_lock_desc *)0)->l_version[4]) == 4); + + /* Checks for struct ldlm_request */ + LASSERT((int)sizeof(struct ldlm_request) == 128); + LASSERT(offsetof(struct ldlm_request, lock_flags) == 0); + LASSERT((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4); + LASSERT(offsetof(struct ldlm_request, lock_desc) == 4); + LASSERT((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 108); + LASSERT(offsetof(struct ldlm_request, lock_handle1) == 112); + LASSERT((int)sizeof(((struct ldlm_request *)0)->lock_handle1) == 8); + LASSERT(offsetof(struct ldlm_request, lock_handle2) == 120); + LASSERT((int)sizeof(((struct ldlm_request *)0)->lock_handle2) == 8); + + /* Checks for struct ldlm_reply */ + LASSERT((int)sizeof(struct ldlm_reply) == 96); + LASSERT(offsetof(struct ldlm_reply, lock_flags) == 0); + LASSERT((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4); + LASSERT(offsetof(struct ldlm_reply, lock_mode) == 4); + LASSERT((int)sizeof(((struct ldlm_reply *)0)->lock_mode) == 4); + LASSERT(offsetof(struct ldlm_reply, lock_resource_name) == 8); + LASSERT((int)sizeof(((struct ldlm_reply *)0)->lock_resource_name) == 32); + LASSERT(offsetof(struct ldlm_reply, lock_handle) == 40); + LASSERT((int)sizeof(((struct ldlm_reply *)0)->lock_handle) == 8); + LASSERT(offsetof(struct ldlm_reply, lock_policy_data) == 48); + LASSERT((int)sizeof(((struct ldlm_reply *)0)->lock_policy_data) == 32); + LASSERT(offsetof(struct ldlm_reply, lock_policy_res1) == 80); + LASSERT((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1) == 8); + LASSERT(offsetof(struct ldlm_reply, lock_policy_res2) == 88); + LASSERT((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2) == 8); + + /* Checks for struct ptlbd_op */ + LASSERT((int)sizeof(struct ptlbd_op) == 12); + LASSERT(offsetof(struct ptlbd_op, op_cmd) == 0); + LASSERT((int)sizeof(((struct ptlbd_op *)0)->op_cmd) == 2); + LASSERT(offsetof(struct ptlbd_op, op_lun) == 2); + LASSERT((int)sizeof(((struct ptlbd_op *)0)->op_lun) == 2); + LASSERT(offsetof(struct ptlbd_op, op_niob_cnt) == 4); + LASSERT((int)sizeof(((struct ptlbd_op *)0)->op_niob_cnt) == 2); + LASSERT(offsetof(struct ptlbd_op, op__padding) == 6); + LASSERT((int)sizeof(((struct ptlbd_op *)0)->op__padding) == 2); + LASSERT(offsetof(struct ptlbd_op, op_block_cnt) == 8); + LASSERT((int)sizeof(((struct ptlbd_op *)0)->op_block_cnt) == 4); + + /* Checks for struct ptlbd_niob */ + LASSERT((int)sizeof(struct ptlbd_niob) == 24); + LASSERT(offsetof(struct ptlbd_niob, n_xid) == 0); + LASSERT((int)sizeof(((struct ptlbd_niob *)0)->n_xid) == 8); + LASSERT(offsetof(struct ptlbd_niob, n_block_nr) == 8); + LASSERT((int)sizeof(((struct ptlbd_niob *)0)->n_block_nr) == 8); + LASSERT(offsetof(struct ptlbd_niob, n_offset) == 16); + LASSERT((int)sizeof(((struct ptlbd_niob *)0)->n_offset) == 4); + LASSERT(offsetof(struct ptlbd_niob, n_length) == 20); + LASSERT((int)sizeof(((struct ptlbd_niob *)0)->n_length) == 4); + + /* Checks for struct ptlbd_rsp */ + LASSERT((int)sizeof(struct ptlbd_rsp) == 4); + LASSERT(offsetof(struct ptlbd_rsp, r_status) == 0); + LASSERT((int)sizeof(((struct ptlbd_rsp *)0)->r_status) == 2); + LASSERT(offsetof(struct ptlbd_rsp, r_error_cnt) == 2); + LASSERT((int)sizeof(((struct ptlbd_rsp *)0)->r_error_cnt) == 2); + + /* Checks for struct llog_logid */ + LASSERT((int)sizeof(struct llog_logid) == 20); + LASSERT(offsetof(struct llog_logid, lgl_oid) == 0); + LASSERT((int)sizeof(((struct llog_logid *)0)->lgl_oid) == 8); + LASSERT(offsetof(struct llog_logid, lgl_ogr) == 8); + LASSERT((int)sizeof(((struct llog_logid *)0)->lgl_ogr) == 8); + LASSERT(offsetof(struct llog_logid, lgl_ogen) == 16); + LASSERT((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4); + + /* Checks for struct llog_rec_hdr */ + LASSERT((int)sizeof(struct llog_rec_hdr) == 16); + LASSERT(offsetof(struct llog_rec_hdr, lrh_len) == 0); + LASSERT((int)sizeof(((struct llog_rec_hdr *)0)->lrh_len) == 4); + LASSERT(offsetof(struct llog_rec_hdr, lrh_index) == 4); + LASSERT((int)sizeof(((struct llog_rec_hdr *)0)->lrh_index) == 4); + LASSERT(offsetof(struct llog_rec_hdr, lrh_type) == 8); + LASSERT((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4); + + /* Checks for struct llog_rec_tail */ + LASSERT((int)sizeof(struct llog_rec_tail) == 8); + LASSERT(offsetof(struct llog_rec_tail, lrt_len) == 0); + LASSERT((int)sizeof(((struct llog_rec_tail *)0)->lrt_len) == 4); + LASSERT(offsetof(struct llog_rec_tail, lrt_index) == 4); + LASSERT((int)sizeof(((struct llog_rec_tail *)0)->lrt_index) == 4); + + /* Checks for struct llog_log_hdr */ + LASSERT((int)sizeof(struct llog_log_hdr) == 4096); + LASSERT(offsetof(struct llog_log_hdr, llh_hdr) == 0); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_hdr) == 16); + LASSERT(offsetof(struct llog_log_hdr, llh_timestamp) == 16); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp) == 8); + LASSERT(offsetof(struct llog_log_hdr, llh_count) == 24); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_count) == 4); + LASSERT(offsetof(struct llog_log_hdr, llh_bitmap_offset) == 28); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset) == 4); + LASSERT(offsetof(struct llog_log_hdr, llh_size) == 32); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_size) == 4); + LASSERT(offsetof(struct llog_log_hdr, llh_flags) == 36); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_flags) == 4); + LASSERT(offsetof(struct llog_log_hdr, llh_cat_idx) == 40); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx) == 4); + LASSERT(offsetof(struct llog_log_hdr, llh_tgtuuid) == 44); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid) == 40); + LASSERT(offsetof(struct llog_log_hdr, llh_reserved) == 84); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_reserved) == 4); + LASSERT(offsetof(struct llog_log_hdr, llh_bitmap) == 88); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap) == 4000); + LASSERT(offsetof(struct llog_log_hdr, llh_tail) == 4088); + LASSERT((int)sizeof(((struct llog_log_hdr *)0)->llh_tail) == 8); + + /* Checks for struct llog_cookie */ + LASSERT((int)sizeof(struct llog_cookie) == 32); + LASSERT(offsetof(struct llog_cookie, lgc_lgl) == 0); + LASSERT((int)sizeof(((struct llog_cookie *)0)->lgc_lgl) == 20); + LASSERT(offsetof(struct llog_cookie, lgc_subsys) == 20); + LASSERT((int)sizeof(((struct llog_cookie *)0)->lgc_subsys) == 4); + LASSERT(offsetof(struct llog_cookie, lgc_index) == 24); + LASSERT((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4); + + /* Checks for struct llogd_body */ + LASSERT((int)sizeof(struct llogd_body) == 40); + LASSERT(offsetof(struct llogd_body, lgd_logid) == 0); + LASSERT((int)sizeof(((struct llogd_body *)0)->lgd_logid) == 20); + LASSERT(offsetof(struct llogd_body, lgd_len) == 20); + LASSERT((int)sizeof(((struct llogd_body *)0)->lgd_len) == 4); + LASSERT(offsetof(struct llogd_body, lgd_index) == 24); + LASSERT((int)sizeof(((struct llogd_body *)0)->lgd_index) == 4); + LASSERT(offsetof(struct llogd_body, lgd_saved_index) == 28); + LASSERT((int)sizeof(((struct llogd_body *)0)->lgd_saved_index) == 4); + LASSERT(offsetof(struct llogd_body, lgd_cur_offset) == 32); + LASSERT((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8); + LASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501); + LASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502); + LASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503); + LASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504); + LASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505); +} + -- 1.8.3.1