--- /dev/null
+ include/linux/dynlocks.h | 33 ++++++++++
+ lib/Makefile | 4 -
+ lib/dynlocks.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 187 insertions(+), 2 deletions(-)
+
+Index: linux-2.4.20-rh/include/linux/dynlocks.h
+===================================================================
+--- linux-2.4.20-rh.orig/include/linux/dynlocks.h 2003-09-04 18:25:49.000000000 +0800
++++ linux-2.4.20-rh/include/linux/dynlocks.h 2003-09-04 18:25:49.000000000 +0800
+@@ -0,0 +1,33 @@
++#ifndef _LINUX_DYNLOCKS_H
++#define _LINUX_DYNLOCKS_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct dynlock_member {
++ struct list_head dl_list;
++ unsigned long dl_value; /* lock value */
++ int dl_refcount; /* number of users */
++ int dl_readers;
++ int dl_writers;
++ int dl_pid; /* holder of the lock */
++ wait_queue_head_t dl_wait;
++};
++
++/*
++ * lock's namespace:
++ * - list of locks
++ * - lock to protect this list
++ */
++struct dynlock {
++ struct list_head dl_list;
++ spinlock_t dl_list_lock;
++};
++
++void dynlock_init(struct dynlock *dl);
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp);
++void dynlock_unlock(struct dynlock *dl, void *lock);
++
++
++#endif
++
+Index: linux-2.4.20-rh/lib/dynlocks.c
+===================================================================
+--- linux-2.4.20-rh.orig/lib/dynlocks.c 2003-09-04 18:25:49.000000000 +0800
++++ linux-2.4.20-rh/lib/dynlocks.c 2003-09-04 18:25:49.000000000 +0800
+@@ -0,0 +1,152 @@
++/*
++ * Dynamic Locks
++ *
++ * struct dynlock is lockspace
++ * one may request lock (exclusive or shared) for some value
++ * in that lockspace
++ *
++ */
++
++#include <linux/dynlocks.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++/*
++ * dynlock_init
++ *
++ * initialize lockspace
++ *
++ */
++void dynlock_init(struct dynlock *dl)
++{
++ spin_lock_init(&dl->dl_list_lock);
++ INIT_LIST_HEAD(&dl->dl_list);
++}
++
++/*
++ * dynlock_lock
++ *
++ * acquires lock (exclusive or shared) in specified lockspace
++ * each lock in lockspace is allocated separately, so user have
++ * to specify GFP flags.
++ * routine returns pointer to lock. this pointer is intended to
++ * be passed to dynlock_unlock
++ *
++ */
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp)
++{
++ struct dynlock_member *nhl = NULL;
++ struct dynlock_member *hl;
++ struct list_head *cur;
++
++repeat:
++ /* find requested lock in lockspace */
++ spin_lock(&dl->dl_list_lock);
++ list_for_each(cur, &dl->dl_list) {
++ hl = list_entry(cur, struct dynlock_member, dl_list);
++ if (hl->dl_value == value) {
++ /* lock is found */
++ if (nhl) {
++ /* someone else just allocated
++ * lock we didn't find and just created
++ * so, we drop our lock
++ */
++ kfree(nhl);
++ nhl = NULL;
++ }
++ hl->dl_refcount++;
++ goto found;
++ }
++ }
++ /* lock not found */
++ if (nhl) {
++ /* we already have allocated lock. use it */
++ hl = nhl;
++ nhl = NULL;
++ list_add(&hl->dl_list, &dl->dl_list);
++ goto found;
++ }
++ spin_unlock(&dl->dl_list_lock);
++
++ /* lock not found and we haven't allocated lock yet. allocate it */
++ nhl = kmalloc(sizeof(struct dynlock_member), gfp);
++ if (nhl == NULL)
++ return NULL;
++ nhl->dl_refcount = 1;
++ nhl->dl_value = value;
++ nhl->dl_readers = 0;
++ nhl->dl_writers = 0;
++ init_waitqueue_head(&nhl->dl_wait);
++
++ /* while lock is being allocated, someone else may allocate it
++ * and put onto to list. check this situation
++ */
++ goto repeat;
++
++found:
++ if (rw) {
++ /* exclusive lock: user don't want to share lock at all
++ * NOTE: one process may take the same lock several times
++ * this functionaly is useful for rename operations */
++ while ((hl->dl_writers && hl->dl_pid != current->pid) ||
++ hl->dl_readers) {
++ spin_unlock(&dl->dl_list_lock);
++ wait_event(hl->dl_wait,
++ hl->dl_writers == 0 && hl->dl_readers == 0);
++ spin_lock(&dl->dl_list_lock);
++ }
++ hl->dl_writers++;
++ } else {
++ /* shared lock: user do not want to share lock with writer */
++ while (hl->dl_writers) {
++ spin_unlock(&dl->dl_list_lock);
++ wait_event(hl->dl_wait, hl->dl_writers == 0);
++ spin_lock(&dl->dl_list_lock);
++ }
++ hl->dl_readers++;
++ }
++ hl->dl_pid = current->pid;
++ spin_unlock(&dl->dl_list_lock);
++
++ return hl;
++}
++
++
++/*
++ * dynlock_unlock
++ *
++ * user have to specify lockspace (dl) and pointer to lock structure
++ * returned by dynlock_lock()
++ *
++ */
++void dynlock_unlock(struct dynlock *dl, void *lock)
++{
++ struct dynlock_member *hl = lock;
++ int wakeup = 0;
++
++ spin_lock(&dl->dl_list_lock);
++ if (hl->dl_writers) {
++ hl->dl_writers--;
++ if (hl->dl_writers == 0)
++ wakeup = 1;
++ } else {
++ hl->dl_readers--;
++ if (hl->dl_readers == 0)
++ wakeup = 1;
++ }
++ if (wakeup) {
++ hl->dl_pid = 0;
++ wake_up(&hl->dl_wait);
++ }
++ if (--(hl->dl_refcount) == 0)
++ list_del(&hl->dl_list);
++ spin_unlock(&dl->dl_list_lock);
++ if (hl->dl_refcount == 0)
++ kfree(hl);
++}
++
++EXPORT_SYMBOL(dynlock_init);
++EXPORT_SYMBOL(dynlock_lock);
++EXPORT_SYMBOL(dynlock_unlock);
++
+Index: linux-2.4.20-rh/lib/Makefile
+===================================================================
+--- linux-2.4.20-rh.orig/lib/Makefile 2002-11-29 07:53:15.000000000 +0800
++++ linux-2.4.20-rh/lib/Makefile 2003-09-04 18:27:26.000000000 +0800
+@@ -8,10 +8,10 @@
+
+ L_TARGET := lib.a
+
+-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o
++export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o dynlocks.o
+
+ obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \
+- bust_spinlocks.o rbtree.o dump_stack.o
++ bust_spinlocks.o rbtree.o dump_stack.o dynlocks.o
+
+ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+ obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
--- /dev/null
+
+Index: linux-2.4.20-rh/fs/ext3/inode.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/ext3/inode.c 2003-09-04 18:01:41.000000000 +0800
++++ linux-2.4.20-rh/fs/ext3/inode.c 2003-09-04 18:18:54.000000000 +0800
+@@ -27,6 +27,7 @@
+ #include <linux/ext3_jbd.h>
+ #include <linux/jbd.h>
+ #include <linux/locks.h>
++#include <linux/iobuf.h>
+ #include <linux/smp_lock.h>
+ #include <linux/highuid.h>
+ #include <linux/quotaops.h>
+@@ -743,9 +744,9 @@
+ * The BKL may not be held on entry here. Be sure to take it early.
+ */
+
+-static int ext3_get_block_handle(handle_t *handle, struct inode *inode,
+- long iblock,
+- struct buffer_head *bh_result, int create)
++static int
++ext3_get_block_handle(handle_t *handle, struct inode *inode, long iblock,
++ struct buffer_head *bh_result, int create, int extend_disksize)
+ {
+ int err = -EIO;
+ int offsets[4];
+@@ -825,15 +826,18 @@
+ if (err)
+ goto cleanup;
+
+- new_size = inode->i_size;
+- /*
+- * This is not racy against ext3_truncate's modification of i_disksize
+- * because VM/VFS ensures that the file cannot be extended while
+- * truncate is in progress. It is racy between multiple parallel
+- * instances of get_block, but we have the BKL.
+- */
+- if (new_size > inode->u.ext3_i.i_disksize)
+- inode->u.ext3_i.i_disksize = new_size;
++ if (extend_disksize) {
++ /*
++ * This is not racy against ext3_truncate's modification of
++ * i_disksize because VM/VFS ensures that the file cannot be
++ * extended while truncate is in progress. It is racy between
++ * multiple parallel instances of get_block, but we have BKL.
++ */
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ new_size = inode->i_size;
++ if (new_size > ei->i_disksize)
++ ei->i_disksize = new_size;
++ }
+
+ bh_result->b_state |= (1UL << BH_New);
+ goto got_it;
+@@ -861,7 +865,38 @@
+ handle = ext3_journal_current_handle();
+ J_ASSERT(handle != 0);
+ }
+- ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create);
++ ret = ext3_get_block_handle(handle, inode, iblock,
++ bh_result, create, 1);
++ return ret;
++}
++
++#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
++
++static int
++ext3_direct_io_get_block(struct inode *inode, long iblock,
++ struct buffer_head *bh_result, int create)
++{
++ handle_t *handle = journal_current_handle();
++ int ret = 0;
++
++ lock_kernel();
++ if (handle && handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
++ /*
++ * Getting low on buffer credits...
++ */
++ if (!ext3_journal_extend(handle, DIO_CREDITS)) {
++ /*
++ * Couldn't extend the transaction. Start a new one
++ */
++ ret = ext3_journal_restart(handle, DIO_CREDITS);
++ }
++ }
++ if (ret == 0)
++ ret = ext3_get_block_handle(handle, inode, iblock,
++ bh_result, create, 0);
++ if (ret == 0)
++ bh_result->b_size = (1 << inode->i_blkbits);
++ unlock_kernel();
+ return ret;
+ }
+
+@@ -879,7 +914,7 @@
+ dummy.b_state = 0;
+ dummy.b_blocknr = -1000;
+ buffer_trace_init(&dummy.b_history);
+- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create);
++ *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
+ if (!*errp && buffer_mapped(&dummy)) {
+ struct buffer_head *bh;
+ bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1387,6 +1422,67 @@
+ return journal_try_to_free_buffers(journal, page, wait);
+ }
+
++static int
++ext3_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
++ unsigned long blocknr, int blocksize)
++{
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ handle_t *handle = NULL;
++ int ret;
++ int orphan = 0;
++ loff_t offset = blocknr << inode->i_blkbits; /* ugh */
++ ssize_t count = iobuf->length; /* ditto */
++
++ if (rw == WRITE) {
++ loff_t final_size = offset + count;
++
++ lock_kernel();
++ handle = ext3_journal_start(inode, DIO_CREDITS);
++ unlock_kernel();
++ if (IS_ERR(handle)) {
++ ret = PTR_ERR(handle);
++ goto out;
++ }
++ if (final_size > inode->i_size) {
++ lock_kernel();
++ ret = ext3_orphan_add(handle, inode);
++ unlock_kernel();
++ if (ret)
++ goto out_stop;
++ orphan = 1;
++ ei->i_disksize = inode->i_size;
++ }
++ }
++
++ ret = generic_direct_IO(rw, inode, iobuf, blocknr,
++ blocksize, ext3_direct_io_get_block);
++
++out_stop:
++ if (handle) {
++ int err;
++
++ lock_kernel();
++ if (orphan)
++ ext3_orphan_del(handle, inode);
++ if (orphan && ret > 0) {
++ loff_t end = offset + ret;
++ if (end > inode->i_size) {
++ ei->i_disksize = end;
++ inode->i_size = end;
++ err = ext3_mark_inode_dirty(handle, inode);
++ if (!ret)
++ ret = err;
++ }
++ }
++ err = ext3_journal_stop(handle, inode);
++ if (ret == 0)
++ ret = err;
++ unlock_kernel();
++ }
++out:
++ return ret;
++
++}
+
+ struct address_space_operations ext3_aops = {
+ readpage: ext3_readpage, /* BKL not held. Don't need */
+@@ -1397,6 +1493,7 @@
+ bmap: ext3_bmap, /* BKL held */
+ flushpage: ext3_flushpage, /* BKL not held. Don't need */
+ releasepage: ext3_releasepage, /* BKL not held. Don't need */
++ direct_IO: ext3_direct_IO, /* BKL not held. Don't need */
+ };
+
+ /*
+@@ -2970,7 +3067,7 @@
+ /* alloc blocks one by one */
+ for (i = 0; i < nblocks; i++) {
+ ret = ext3_get_block_handle(handle, inode, blocks[i],
+- &bh_tmp, 1);
++ &bh_tmp, 1, 1);
+ if (ret)
+ break;
+
+@@ -3030,7 +3127,7 @@
+ if (blocks[i] != 0)
+ continue;
+
+- rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1);
++ rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1, 1);
+ if (rc) {
+ printk(KERN_INFO "ext3_map_inode_page: error reading "
+ "block %ld\n", iblock);
--- /dev/null
+ fs/inode.c | 1
+ fs/namei.c | 66 ++++++++++++++++++++++++++++++++++++++---------------
+ include/linux/fs.h | 11 ++++----
+ 3 files changed, 54 insertions(+), 24 deletions(-)
+
+Index: linux-2.4.20-rh/fs/namei.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/namei.c 2003-09-04 20:58:33.000000000 +0800
++++ linux-2.4.20-rh/fs/namei.c 2003-09-04 21:21:20.000000000 +0800
+@@ -101,6 +101,36 @@
+
+ }
+
++static void *lock_dir(struct inode *dir, struct qstr *name)
++{
++ unsigned long hash;
++
++ if (!IS_PDIROPS(dir)) {
++ down(&dir->i_sem);
++ return 0;
++ }
++
++ /* OK. fs understands parallel directory operations.
++ * so, we try to acquire lock for hash of requested
++ * filename in order to prevent any operations with
++ * same name in same time -bzzz */
++
++ /* calculate name hash */
++ hash = full_name_hash(name->name, name->len);
++
++ /* lock this hash */
++ return dynlock_lock(&dir->i_dcache_lock, hash, 1, GFP_ATOMIC);
++}
++
++static void unlock_dir(struct inode *dir, void *lock)
++{
++ if (!IS_PDIROPS(dir)) {
++ up(&dir->i_sem);
++ return;
++ }
++ dynlock_unlock(&dir->i_dcache_lock, lock);
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+@@ -302,10 +332,10 @@
+ {
+ struct dentry * result;
+ struct inode *dir = parent->d_inode;
++ void *lock;
+
+ again:
+-
+- down(&dir->i_sem);
++ lock = lock_dir(dir, name);
+ /*
+ * First re-do the cached lookup just in case it was created
+ * while we waited for the directory semaphore..
+@@ -329,7 +359,7 @@
+ else
+ result = dentry;
+ }
+- up(&dir->i_sem);
++ unlock_dir(dir, lock);
+ return result;
+ }
+
+@@ -337,7 +367,7 @@
+ * Uhhuh! Nasty case: the cache was re-populated while
+ * we waited on the semaphore. Need to revalidate.
+ */
+- up(&dir->i_sem);
++ unlock_dir(dir, lock);
+ if (result->d_op && result->d_op->d_revalidate) {
+ if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
+ dput(result);
+@@ -1180,13 +1210,13 @@
+ goto exit;
+
+ dir = nd->dentry;
+- down(&dir->d_inode->i_sem);
++ nd->lock = lock_dir(dir->d_inode, &nd->last);
+ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+
+ do_last:
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry)) {
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd->lock);
+ goto exit;
+ }
+
+@@ -1195,7 +1225,7 @@
+ if (!dentry->d_inode) {
+ error = vfs_create_it(dir->d_inode, dentry,
+ mode & ~current->fs->umask, it);
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd->lock);
+ dput(nd->dentry);
+ nd->dentry = dentry;
+ if (error)
+@@ -1209,7 +1239,7 @@
+ /*
+ * It already exists.
+ */
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd->lock);
+
+ error = -EEXIST;
+ if (flag & O_EXCL)
+@@ -1362,7 +1392,7 @@
+ goto exit;
+ }
+ dir = nd->dentry;
+- down(&dir->d_inode->i_sem);
++ nd->lock = lock_dir(dir->d_inode, &nd->last);
+ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ putname(nd->last.name);
+ goto do_last;
+@@ -1380,7 +1410,7 @@
+ {
+ struct dentry *dentry;
+
+- down(&nd->dentry->d_inode->i_sem);
++ nd->lock = lock_dir(nd->dentry->d_inode, &nd->last);
+ dentry = ERR_PTR(-EEXIST);
+ if (nd->last_type != LAST_NORM)
+ goto fail;
+@@ -1469,7 +1499,7 @@
+ }
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1532,7 +1562,7 @@
+ mode & ~current->fs->umask);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1642,14 +1672,14 @@
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
+- down(&nd.dentry->d_inode->i_sem);
++ nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ path_release(&nd);
+ exit:
+@@ -1708,7 +1738,7 @@
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
+- down(&nd.dentry->d_inode->i_sem);
++ nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+@@ -1719,7 +1749,7 @@
+ exit2:
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ path_release(&nd);
+ exit:
+@@ -1789,7 +1819,7 @@
+ error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1881,7 +1911,7 @@
+ error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+ dput(new_dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out_release:
+ path_release(&nd);
+ out:
+Index: linux-2.4.20-rh/include/linux/fs.h
+===================================================================
+--- linux-2.4.20-rh.orig/include/linux/fs.h 2003-09-04 20:59:14.000000000 +0800
++++ linux-2.4.20-rh/include/linux/fs.h 2003-09-04 21:03:46.000000000 +0800
+@@ -21,6 +21,7 @@
+ #include <linux/cache.h>
+ #include <linux/stddef.h>
+ #include <linux/string.h>
++#include <linux/dynlocks.h>
+
+ #include <asm/atomic.h>
+ #include <asm/bitops.h>
+@@ -136,6 +137,7 @@
+ #define S_IMMUTABLE 16 /* Immutable file */
+ #define S_DEAD 32 /* removed, but still open directory */
+ #define S_NOQUOTA 64 /* Inode is not counted to quota */
++#define S_PDIROPS 256 /* Parallel directory operations */
+
+ /*
+ * Note that nosuid etc flags are inode-specific: setting some file-system
+@@ -162,6 +164,7 @@
+ #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
+ #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME))
+ #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME)
++#define IS_PDIROPS(inode) __IS_FLG(inode, S_PDIROPS)
+
+ #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
+
+@@ -489,6 +492,7 @@
+ atomic_t i_writecount;
+ unsigned int i_attr_flags;
+ __u32 i_generation;
++ struct dynlock i_dcache_lock; /* for parallel directory ops */
+ union {
+ struct minix_inode_info minix_i;
+ struct ext2_inode_info ext2_i;
+@@ -708,6 +712,7 @@
+ unsigned int flags;
+ int last_type;
+ struct lookup_intent *intent;
++ void *lock;
+ };
+
+ /*
+@@ -1621,12 +1626,6 @@
+ return dget(dentry->d_parent);
+ }
+
+-static inline void unlock_dir(struct dentry *dir)
+-{
+- up(&dir->d_inode->i_sem);
+- dput(dir);
+-}
+-
+ /*
+ * Whee.. Deadlock country. Happily there are only two VFS
+ * operations that does this..
+Index: linux-2.4.20-rh/fs/inode.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/inode.c 2003-09-04 20:58:35.000000000 +0800
++++ linux-2.4.20-rh/fs/inode.c 2003-09-04 21:03:46.000000000 +0800
+@@ -121,6 +121,7 @@
+ mapping->host = inode;
+ mapping->gfp_mask = GFP_HIGHUSER;
+ inode->i_mapping = mapping;
++ dynlock_init(&inode->i_dcache_lock);
+ }
+ return inode;
+ }