+++ /dev/null
- fs/ext3/ialloc.c | 3
- fs/ext3/inode.c | 3
- fs/ext3/namei.c | 582 +++++++++++++++++++++++++++++++++++++---------
- fs/ext3/super.c | 14 +
- include/linux/ext3_fs.h | 1
- include/linux/ext3_fs_i.h | 6
- 6 files changed, 500 insertions(+), 109 deletions(-)
-
-Index: linux-2.4.24/fs/ext3/namei.c
-===================================================================
---- linux-2.4.24.orig/fs/ext3/namei.c 2004-04-01 20:22:17.000000000 +0400
-+++ linux-2.4.24/fs/ext3/namei.c 2004-04-01 20:24:48.000000000 +0400
-@@ -51,6 +51,9 @@
- {
- struct buffer_head *bh;
-
-+ /* with parallel dir operations all appends
-+ * have to be serialized -bzzz */
-+ down(&EXT3_I(inode)->i_append_sem);
- *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-
- if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
-@@ -58,6 +61,8 @@
- EXT3_I(inode)->i_disksize = inode->i_size;
- ext3_journal_get_write_access(handle,bh);
- }
-+ up(&EXT3_I(inode)->i_append_sem);
-+
- return bh;
- }
-
-@@ -134,6 +139,8 @@
- struct buffer_head *bh;
- struct dx_entry *entries;
- struct dx_entry *at;
-+ unsigned long leaf;
-+ unsigned int curidx;
- };
-
- struct dx_map_entry
-@@ -142,6 +149,30 @@
- u32 offs;
- };
-
-+/* FIXME: this should be reworked using bb_spin_lock
-+ * introduced in -mm tree
-+ */
-+#define BH_DXLock 25
-+
-+static inline void dx_lock_bh(struct buffer_head volatile *bh)
-+{
-+#ifdef CONFIG_SMP
-+ while (test_and_set_bit(BH_DXLock, &bh->b_state)) {
-+ while (test_bit(BH_DXLock, &bh->b_state))
-+ cpu_relax();
-+ }
-+#endif
-+}
-+
-+static inline void dx_unlock_bh(struct buffer_head *bh)
-+{
-+#ifdef CONFIG_SMP
-+ smp_mb__before_clear_bit();
-+ clear_bit(BH_DXLock, &bh->b_state);
-+#endif
-+}
-+
-+
- #ifdef CONFIG_EXT3_INDEX
- static inline unsigned dx_get_block (struct dx_entry *entry);
- static void dx_set_block (struct dx_entry *entry, unsigned value);
-@@ -153,7 +184,7 @@
- static void dx_set_limit (struct dx_entry *entries, unsigned value);
- static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
- static unsigned dx_node_limit (struct inode *dir);
--static struct dx_frame *dx_probe(struct dentry *dentry,
-+static struct dx_frame *dx_probe(struct qstr *name,
- struct inode *dir,
- struct dx_hash_info *hinfo,
- struct dx_frame *frame,
-@@ -165,15 +196,18 @@
- static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
- struct dx_map_entry *offsets, int count);
- static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
--static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
-+static void dx_insert_block (struct inode *, struct dx_frame *, u32, u32, u32);
- static int ext3_htree_next_block(struct inode *dir, __u32 hash,
- struct dx_frame *frame,
- struct dx_frame *frames, int *err,
- __u32 *start_hash);
- static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-- struct ext3_dir_entry_2 **res_dir, int *err);
-+ struct ext3_dir_entry_2 **res_dir, int *err,
-+ int rwlock, void **lock);
- static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
- struct inode *inode);
-+static inline void *ext3_lock_htree(struct inode *, unsigned long, int);
-+static inline void ext3_unlock_htree(struct inode *, void *);
-
- /*
- * Future: use high four bits of block for coalesce-on-delete flags
-@@ -306,6 +340,94 @@
- #endif /* DX_DEBUG */
-
- /*
-+ * dx_find_position
-+ *
-+ * search position of specified hash in index
-+ *
-+ */
-+
-+struct dx_entry * dx_find_position(struct dx_entry * entries, u32 hash)
-+{
-+ struct dx_entry *p, *q, *m;
-+ int count;
-+
-+ count = dx_get_count(entries);
-+ p = entries + 1;
-+ q = entries + count - 1;
-+ while (p <= q)
-+ {
-+ m = p + (q - p)/2;
-+ if (dx_get_hash(m) > hash)
-+ q = m - 1;
-+ else
-+ p = m + 1;
-+ }
-+ return p - 1;
-+}
-+
-+/*
-+ * returns 1 if path is unchanged
-+ */
-+int dx_check_path(struct dx_frame *frame, u32 hash)
-+{
-+ struct dx_entry *p;
-+ int ret = 1;
-+
-+ dx_lock_bh(frame->bh);
-+ p = dx_find_position(frame->entries, hash);
-+ if (frame->leaf != dx_get_block(p))
-+ ret = 0;
-+ dx_unlock_bh(frame->bh);
-+
-+ return ret;
-+}
-+
-+/*
-+ * 0 - changed
-+ * 1 - hasn't changed
-+ */
-+static int
-+dx_check_full_path(struct dx_frame *frames, struct dx_hash_info *hinfo)
-+{
-+ struct dx_entry *p;
-+ struct dx_frame *frame = frames;
-+ u32 leaf;
-+
-+ /* check first level */
-+ dx_lock_bh(frame->bh);
-+ p = dx_find_position(frame->entries, hinfo->hash);
-+ leaf = dx_get_block(p);
-+ dx_unlock_bh(frame->bh);
-+
-+ if (leaf != frame->leaf)
-+ return 0;
-+
-+ /* is there 2nd level? */
-+ frame++;
-+ if (frame->bh == NULL)
-+ return 1;
-+
-+ /* check second level */
-+ dx_lock_bh(frame->bh);
-+
-+ /* probably 1st level got changed, check it */
-+ if (!dx_check_path(frames, hinfo->hash)) {
-+ /* path changed */
-+ dx_unlock_bh(frame->bh);
-+ return 0;
-+ }
-+
-+ p = dx_find_position(frame->entries, hinfo->hash);
-+ leaf = dx_get_block(p);
-+ dx_unlock_bh(frame->bh);
-+
-+ if (leaf != frame->leaf)
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/*
- * Probe for a directory leaf block to search.
- *
- * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
-@@ -315,19 +437,20 @@
- * back to userspace.
- */
- static struct dx_frame *
--dx_probe(struct dentry *dentry, struct inode *dir,
-+dx_probe(struct qstr *name, struct inode *dir,
- struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
- {
-- unsigned count, indirect;
-- struct dx_entry *at, *entries, *p, *q, *m;
-+ unsigned indirect;
-+ struct dx_entry *at, *entries;
- struct dx_root *root;
- struct buffer_head *bh;
- struct dx_frame *frame = frame_in;
- u32 hash;
-+ unsigned int curidx;
-
- frame->bh = NULL;
-- if (dentry)
-- dir = dentry->d_parent->d_inode;
-+ frame[1].bh = NULL;
-+
- if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
- goto fail;
- root = (struct dx_root *) bh->b_data;
-@@ -343,8 +466,8 @@
- }
- hinfo->hash_version = root->info.hash_version;
- hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
-- if (dentry)
-- ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
-+ if (name)
-+ ext3fs_dirhash(name->name, name->len, hinfo);
- hash = hinfo->hash;
-
- if (root->info.unused_flags & 1) {
-@@ -356,7 +479,19 @@
- goto fail;
- }
-
-+repeat:
-+ curidx = 0;
-+ entries = (struct dx_entry *) (((char *)&root->info) +
-+ root->info.info_length);
-+ assert(dx_get_limit(entries) == dx_root_limit(dir,
-+ root->info.info_length));
-+ dxtrace (printk("Look up %x", hash));
-+ dx_lock_bh(bh);
-+ /* indirect must be initialized under bh lock because
-+ * 2nd level creation procedure may change it and dx_probe()
-+ * will suggest htree is still single-level -bzzz */
- if ((indirect = root->info.indirect_levels) > 1) {
-+ dx_unlock_bh(bh);
- ext3_warning(dir->i_sb, __FUNCTION__,
- "Unimplemented inode hash depth: %#06x",
- root->info.indirect_levels);
-@@ -364,56 +499,46 @@
- *err = ERR_BAD_DX_DIR;
- goto fail;
- }
--
-- entries = (struct dx_entry *) (((char *)&root->info) +
-- root->info.info_length);
-- assert(dx_get_limit(entries) == dx_root_limit(dir,
-- root->info.info_length));
-- dxtrace (printk("Look up %x", hash));
-+
- while (1)
- {
-- count = dx_get_count(entries);
-- assert (count && count <= dx_get_limit(entries));
-- p = entries + 1;
-- q = entries + count - 1;
-- while (p <= q)
-- {
-- m = p + (q - p)/2;
-- dxtrace(printk("."));
-- if (dx_get_hash(m) > hash)
-- q = m - 1;
-- else
-- p = m + 1;
-- }
--
-- if (0) // linear search cross check
-- {
-- unsigned n = count - 1;
-- at = entries;
-- while (n--)
-- {
-- dxtrace(printk(","));
-- if (dx_get_hash(++at) > hash)
-- {
-- at--;
-- break;
-- }
-- }
-- assert (at == p - 1);
-- }
--
-- at = p - 1;
-- dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
-+ at = dx_find_position(entries, hinfo->hash);
-+ dxtrace(printk(" %x->%u\n",
-+ at == entries? 0: dx_get_hash(at),
-+ dx_get_block(at)));
- frame->bh = bh;
- frame->entries = entries;
- frame->at = at;
-- if (!indirect--) return frame;
-- if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
-+ frame->curidx = curidx;
-+ frame->leaf = dx_get_block(at);
-+ if (!indirect--) {
-+ dx_unlock_bh(bh);
-+ return frame;
-+ }
-+
-+ /* step into next htree level */
-+ curidx = dx_get_block(at);
-+ dx_unlock_bh(bh);
-+ if (!(bh = ext3_bread (NULL,dir, frame->leaf, 0, err)))
- goto fail2;
-+
-+ dx_lock_bh(bh);
-+ /* splitting may change root index block and move
-+ * hash we're looking for into another index block
-+ * so, we have to check this situation and repeat
-+ * from begining if path got changed -bzzz */
-+ if (!dx_check_path(frame, hash)) {
-+ dx_unlock_bh(bh);
-+ bh = frame->bh;
-+ indirect++;
-+ goto repeat;
-+ }
-+
- at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
- frame++;
- }
-+ dx_unlock_bh(bh);
- fail2:
- while (frame >= frame_in) {
- brelse(frame->bh);
-@@ -427,8 +552,7 @@
- {
- if (frames[0].bh == NULL)
- return;
--
-- if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
-+ if (frames[1].bh != NULL)
- brelse(frames[1].bh);
- brelse(frames[0].bh);
- }
-@@ -470,8 +594,10 @@
- * nodes need to be read.
- */
- while (1) {
-- if (++(p->at) < p->entries + dx_get_count(p->entries))
-+ if (++(p->at) < p->entries + dx_get_count(p->entries)) {
-+ p->leaf = dx_get_block(p->at);
- break;
-+ }
- if (p == frames)
- return 0;
- num_frames++;
-@@ -497,13 +623,17 @@
- * block so no check is necessary
- */
- while (num_frames--) {
-- if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
-- 0, err)))
-+ u32 idx;
-+
-+ idx = p->leaf = dx_get_block(p->at);
-+ if (!(bh = ext3_bread(NULL, dir, idx, 0, err)))
- return -1; /* Failure */
- p++;
- brelse (p->bh);
- p->bh = bh;
- p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
-+ p->curidx = idx;
-+ p->leaf = dx_get_block(p->at);
- }
- return 1;
- }
-@@ -543,7 +673,7 @@
- dir = dir_file->f_dentry->d_inode;
- hinfo.hash = start_hash;
- hinfo.minor_hash = 0;
-- frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
-+ frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
- if (!frame)
- return err;
-
-@@ -625,7 +755,8 @@
- count++;
- }
- /* XXX: do we need to check rec_len == 0 case? -Chris */
-- de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
-+ de = (struct ext3_dir_entry_2 *)((char*)de +
-+ le16_to_cpu(de->rec_len));
- }
- return count;
- }
-@@ -658,7 +789,8 @@
- } while(more);
- }
-
--static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
-+static void dx_insert_block(struct inode *dir, struct dx_frame *frame,
-+ u32 hash, u32 block, u32 idx)
- {
- struct dx_entry *entries = frame->entries;
- struct dx_entry *old = frame->at, *new = old + 1;
-@@ -670,6 +802,7 @@
- dx_set_hash(new, hash);
- dx_set_block(new, block);
- dx_set_count(entries, count + 1);
-+
- }
- #endif
-
-@@ -752,7 +885,8 @@
-
-
- static struct buffer_head * ext3_find_entry (struct dentry *dentry,
-- struct ext3_dir_entry_2 ** res_dir)
-+ struct ext3_dir_entry_2 ** res_dir,
-+ int rwlock, void **lock)
- {
- struct super_block * sb;
- struct buffer_head * bh_use[NAMEI_RA_SIZE];
-@@ -768,6 +902,7 @@
- int namelen;
- const u8 *name;
- unsigned blocksize;
-+ int do_not_use_dx = 0;
-
- *res_dir = NULL;
- sb = dir->i_sb;
-@@ -776,9 +911,10 @@
- name = dentry->d_name.name;
- if (namelen > EXT3_NAME_LEN)
- return NULL;
-+repeat:
- #ifdef CONFIG_EXT3_INDEX
- if (is_dx(dir)) {
-- bh = ext3_dx_find_entry(dentry, res_dir, &err);
-+ bh = ext3_dx_find_entry(dentry, res_dir, &err, rwlock, lock);
- /*
- * On success, or if the error was file not found,
- * return. Otherwise, fall back to doing a search the
-@@ -787,8 +923,14 @@
- if (bh || (err != ERR_BAD_DX_DIR))
- return bh;
- dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
-+ do_not_use_dx = 1;
- }
- #endif
-+ *lock = ext3_lock_htree(dir, 0, rwlock);
-+ if (is_dx(dir) && !do_not_use_dx) {
-+ ext3_unlock_htree(dir, *lock);
-+ goto repeat;
-+ }
- nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
- start = EXT3_I(dir)->i_dir_start_lookup;
- if (start >= nblocks)
-@@ -859,12 +1001,17 @@
- /* Clean up the read-ahead blocks */
- for (; ra_ptr < ra_max; ra_ptr++)
- brelse (bh_use[ra_ptr]);
-+ if (!ret) {
-+ ext3_unlock_htree(dir, *lock);
-+ *lock = NULL;
-+ }
- return ret;
- }
-
- #ifdef CONFIG_EXT3_INDEX
- static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-- struct ext3_dir_entry_2 **res_dir, int *err)
-+ struct ext3_dir_entry_2 **res_dir, int *err,
-+ int rwlock, void **lock)
- {
- struct super_block * sb;
- struct dx_hash_info hinfo;
-@@ -879,11 +1026,22 @@
- struct inode *dir = dentry->d_parent->d_inode;
-
- sb = dir->i_sb;
-- if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err)))
-+repeat:
-+ if (!(frame = dx_probe (&dentry->d_name, dir, &hinfo, frames, err)))
- return NULL;
-+
-+ *lock = ext3_lock_htree(dir, frame->leaf, rwlock);
-+ /* while locking leaf we just found may get splitted
-+ * so, we need another leaf. check this */
-+ if (!dx_check_full_path(frames, &hinfo)) {
-+ ext3_unlock_htree(dir, *lock);
-+ dx_release(frames);
-+ goto repeat;
-+ }
-+
- hash = hinfo.hash;
- do {
-- block = dx_get_block(frame->at);
-+ block = frame->leaf;
- if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
- goto errout;
- de = (struct ext3_dir_entry_2 *) bh->b_data;
-@@ -917,6 +1075,8 @@
- *err = -ENOENT;
- errout:
- dxtrace(printk("%s not found\n", name));
-+ ext3_unlock_htree(dir, *lock);
-+ *lock = NULL;
- dx_release (frames);
- return NULL;
- }
-@@ -927,6 +1087,7 @@
- struct inode * inode;
- struct ext3_dir_entry_2 * de;
- struct buffer_head * bh;
-+ void *lock = NULL;
-
- if (dentry->d_name.len > EXT3_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
-@@ -934,10 +1095,11 @@
- if (ext3_check_for_iopen(dir, dentry))
- return NULL;
-
-- bh = ext3_find_entry(dentry, &de);
-+ bh = ext3_find_entry(dentry, &de, 0, &lock);
- inode = NULL;
- if (bh) {
- unsigned long ino = le32_to_cpu(de->inode);
-+ ext3_unlock_htree(dir, lock);
- brelse (bh);
- inode = iget(dir->i_sb, ino);
-
-@@ -974,7 +1136,8 @@
- unsigned rec_len = 0;
-
- while (count--) {
-- struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
-+ struct ext3_dir_entry_2 *de =
-+ (struct ext3_dir_entry_2 *) (from + map->offs);
- rec_len = EXT3_DIR_REC_LEN(de->name_len);
- memcpy (to, de, rec_len);
- ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
-@@ -987,7 +1150,8 @@
-
- static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
- {
-- struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
-+ struct ext3_dir_entry_2 *next, *to, *prev;
-+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) base;
- unsigned rec_len = 0;
-
- prev = to = de;
-@@ -1009,7 +1173,8 @@
-
- static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
- struct buffer_head **bh,struct dx_frame *frame,
-- struct dx_hash_info *hinfo, int *error)
-+ struct dx_hash_info *hinfo, void **target,
-+ int *error)
- {
- unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
-@@ -1056,23 +1221,30 @@
- hash2 = map[split].hash;
- continued = hash2 == map[split - 1].hash;
- dxtrace(printk("Split block %i at %x, %i/%i\n",
-- dx_get_block(frame->at), hash2, split, count-split));
--
-+ frame->leaf, hash2, split, count-split));
-+
- /* Fancy dance to stay within two buffers */
- de2 = dx_move_dirents(data1, data2, map + split, count - split);
- de = dx_pack_dirents(data1,blocksize);
- de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
- de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
-- dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
-- dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
-+ dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data1, blocksize, 1));
-+ dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data2, blocksize, 1));
-
- /* Which block gets the new entry? */
-+ *target = NULL;
- if (hinfo->hash >= hash2)
- {
- swap(*bh, bh2);
- de = de2;
-- }
-- dx_insert_block (frame, hash2 + continued, newblock);
-+
-+ /* entry will be stored into new block
-+ * we have to lock it before add_dirent_to_buf */
-+ *target = ext3_lock_htree(dir, newblock, 1);
-+ }
-+ dx_lock_bh(frame->bh);
-+ dx_insert_block (dir, frame, hash2 + continued, newblock, frame->curidx);
-+ dx_unlock_bh(frame->bh);
- err = ext3_journal_dirty_metadata (handle, bh2);
- if (err)
- goto journal_error;
-@@ -1146,7 +1318,8 @@
- nlen = EXT3_DIR_REC_LEN(de->name_len);
- rlen = le16_to_cpu(de->rec_len);
- if (de->inode) {
-- struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
-+ struct ext3_dir_entry_2 *de1 =
-+ (struct ext3_dir_entry_2 *)((char *)de + nlen);
- de1->rec_len = cpu_to_le16(rlen - nlen);
- de->rec_len = cpu_to_le16(nlen);
- de = de1;
-@@ -1204,7 +1377,8 @@
- unsigned blocksize;
- struct dx_hash_info hinfo;
- u32 block;
--
-+ void *lock, *new_lock;
-+
- blocksize = dir->i_sb->s_blocksize;
- dxtrace(printk("Creating index\n"));
- retval = ext3_journal_get_write_access(handle, bh);
-@@ -1215,7 +1389,6 @@
- }
- root = (struct dx_root *) bh->b_data;
-
-- EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
- bh2 = ext3_append (handle, dir, &block, &retval);
- if (!(bh2)) {
- brelse(bh);
-@@ -1223,6 +1396,8 @@
- }
- data1 = bh2->b_data;
-
-+ lock = ext3_lock_htree(dir, block, 1);
-+
- /* The 0th block becomes the root, move the dirents out */
- de = (struct ext3_dir_entry_2 *) &root->dotdot;
- de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
-@@ -1252,13 +1427,25 @@
- frame->entries = entries;
- frame->at = entries;
- frame->bh = bh;
-+ frame->curidx = 0;
-+ frame->leaf = 0;
-+ frame[1].bh = NULL;
- bh = bh2;
-- de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
-+ de = do_split(handle,dir, &bh, frame, &hinfo, &new_lock, &retval);
- dx_release (frames);
- if (!(de))
-- return retval;
-+ goto cleanup;
-+
-+ retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
-+cleanup:
-+ if (new_lock)
-+ ext3_unlock_htree(dir, new_lock);
-+ /* we mark directory indexed in order to
-+ * avoid races while htree being created -bzzz */
-+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
-+ ext3_unlock_htree(dir, lock);
-
-- return add_dirent_to_buf(handle, dentry, inode, de, bh);
-+ return retval;
- }
- #endif
-
-@@ -1287,11 +1474,13 @@
- unsigned blocksize;
- unsigned nlen, rlen;
- u32 block, blocks;
-+ void *lock;
-
- sb = dir->i_sb;
- blocksize = sb->s_blocksize;
- if (!dentry->d_name.len)
- return -EINVAL;
-+repeat:
- #ifdef CONFIG_EXT3_INDEX
- if (is_dx(dir)) {
- retval = ext3_dx_add_entry(handle, dentry, inode);
-@@ -1302,36 +1491,53 @@
- ext3_mark_inode_dirty(handle, dir);
- }
- #endif
-+ lock = ext3_lock_htree(dir, 0, 1);
-+ if (is_dx(dir)) {
-+ /* we got lock for block 0
-+ * probably previous holder of the lock
-+ * created htree -bzzz */
-+ ext3_unlock_htree(dir, lock);
-+ goto repeat;
-+ }
-+
- blocks = dir->i_size >> sb->s_blocksize_bits;
- for (block = 0, offset = 0; block < blocks; block++) {
- bh = ext3_bread(handle, dir, block, 0, &retval);
-- if(!bh)
-+ if(!bh) {
-+ ext3_unlock_htree(dir, lock);
- return retval;
-+ }
- retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
-- if (retval != -ENOSPC)
-+ if (retval != -ENOSPC) {
-+ ext3_unlock_htree(dir, lock);
- return retval;
-+ }
-
- #ifdef CONFIG_EXT3_INDEX
- if (blocks == 1 && !dx_fallback &&
-- EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
-- return make_indexed_dir(handle, dentry, inode, bh);
-+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) {
-+ retval = make_indexed_dir(handle, dentry, inode, bh);
-+ ext3_unlock_htree(dir, lock);
-+ return retval;
-+ }
- #endif
- brelse(bh);
- }
- bh = ext3_append(handle, dir, &block, &retval);
-- if (!bh)
-+ if (!bh) {
-+ ext3_unlock_htree(dir, lock);
- return retval;
-+ }
- de = (struct ext3_dir_entry_2 *) bh->b_data;
- de->inode = 0;
- de->rec_len = cpu_to_le16(rlen = blocksize);
- nlen = 0;
-- return add_dirent_to_buf(handle, dentry, inode, de, bh);
-+ retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
-+ ext3_unlock_htree(dir, lock);
-+ return retval;
- }
-
- #ifdef CONFIG_EXT3_INDEX
--/*
-- * Returns 0 for success, or a negative error value
-- */
- static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
- struct inode *inode)
- {
-@@ -1343,15 +1549,28 @@
- struct super_block * sb = dir->i_sb;
- struct ext3_dir_entry_2 *de;
- int err;
--
-- frame = dx_probe(dentry, 0, &hinfo, frames, &err);
-+ int curidx;
-+ void *idx_lock, *leaf_lock, *newleaf_lock;
-+
-+repeat:
-+ frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
- if (!frame)
- return err;
-- entries = frame->entries;
-- at = frame->at;
-
-- if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
-+ /* we're going to chage leaf, so lock it first */
-+ leaf_lock = ext3_lock_htree(dir, frame->leaf, 1);
-+
-+ /* while locking leaf we just found may get splitted
-+ * so we need to check this */
-+ if (!dx_check_full_path(frames, &hinfo)) {
-+ ext3_unlock_htree(dir, leaf_lock);
-+ dx_release(frames);
-+ goto repeat;
-+ }
-+ if (!(bh = ext3_bread(handle,dir, frame->leaf, 0, &err))) {
-+ printk("can't ext3_bread(%d) = %d\n", (int) frame->leaf, err);
- goto cleanup;
-+ }
-
- BUFFER_TRACE(bh, "get_write_access");
- err = ext3_journal_get_write_access(handle, bh);
-@@ -1364,6 +1583,35 @@
- goto cleanup;
- }
-
-+ /* our leaf has no enough space. hence, we have to
-+ * split it. so lock index for this leaf first */
-+ curidx = frame->curidx;
-+ idx_lock = ext3_lock_htree(dir, curidx, 1);
-+
-+ /* now check did path get changed? */
-+ dx_release(frames);
-+
-+ frame = dx_probe(&dentry->d_name, dentry->d_parent->d_inode,
-+ &hinfo, frames, &err);
-+ if (!frame) {
-+ /* FIXME: error handling here */
-+ brelse(bh);
-+ ext3_unlock_htree(dir, idx_lock);
-+ return err;
-+ }
-+
-+ if (frame->curidx != curidx) {
-+ /* path has been changed. we have to drop old lock
-+ * and repeat */
-+ brelse(bh);
-+ ext3_unlock_htree(dir, idx_lock);
-+ ext3_unlock_htree(dir, leaf_lock);
-+ dx_release(frames);
-+ goto repeat;
-+ }
-+ entries = frame->entries;
-+ at = frame->at;
-+
- /* Block full, should compress but for now just split */
- dxtrace(printk("using %u of %u node entries\n",
- dx_get_count(entries), dx_get_limit(entries)));
-@@ -1375,7 +1623,8 @@
- struct dx_entry *entries2;
- struct dx_node *node2;
- struct buffer_head *bh2;
--
-+ void *nb_lock;
-+
- if (levels && (dx_get_count(frames->entries) ==
- dx_get_limit(frames->entries))) {
- ext3_warning(sb, __FUNCTION__,
-@@ -1386,6 +1635,7 @@
- bh2 = ext3_append (handle, dir, &newblock, &err);
- if (!(bh2))
- goto cleanup;
-+ nb_lock = ext3_lock_htree(dir, newblock, 1);
- node2 = (struct dx_node *)(bh2->b_data);
- entries2 = node2->entries;
- node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
-@@ -1397,27 +1647,73 @@
- if (levels) {
- unsigned icount1 = icount/2, icount2 = icount - icount1;
- unsigned hash2 = dx_get_hash(entries + icount1);
-+ void *ri_lock;
-+
-+ /* we have to protect root htree index against
-+ * another dx_add_entry() which would want to
-+ * split it too -bzzz */
-+ ri_lock = ext3_lock_htree(dir, 0, 1);
-+
-+ /* as root index block blocked we must repeat
-+ * searching for current position of our 2nd index -bzzz */
-+ dx_lock_bh(frame->bh);
-+ frames->at = dx_find_position(frames->entries, hinfo.hash);
-+ dx_unlock_bh(frame->bh);
-+
- dxtrace(printk("Split index %i/%i\n", icount1, icount2));
--
-- BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
-+
-+ BUFFER_TRACE(frame->bh, "get_write_access");
- err = ext3_journal_get_write_access(handle,
- frames[0].bh);
- if (err)
- goto journal_error;
--
-+
-+ /* copy index into new one */
- memcpy ((char *) entries2, (char *) (entries + icount1),
- icount2 * sizeof(struct dx_entry));
-- dx_set_count (entries, icount1);
- dx_set_count (entries2, icount2);
- dx_set_limit (entries2, dx_node_limit(dir));
-
- /* Which index block gets the new entry? */
- if (at - entries >= icount1) {
-+ /* unlock index we won't use */
-+ ext3_unlock_htree(dir, idx_lock);
-+ idx_lock = nb_lock;
- frame->at = at = at - entries - icount1 + entries2;
-- frame->entries = entries = entries2;
-+ frame->entries = entries2;
-+ frame->curidx = curidx = newblock;
- swap(frame->bh, bh2);
-+ } else {
-+ /* we'll use old index,so new one may be freed */
-+ ext3_unlock_htree(dir, nb_lock);
- }
-- dx_insert_block (frames + 0, hash2, newblock);
-+
-+ /* NOTE: very subtle piece of code
-+ * competing dx_probe() may find 2nd level index in root
-+ * index, then we insert new index here and set new count
-+ * in that 2nd level index. so, dx_probe() may see 2nd
-+ * level index w/o hash it looks for. the solution is
-+ * to check root index after we locked just founded 2nd
-+ * level index -bzzz */
-+ dx_lock_bh(frames[0].bh);
-+ dx_insert_block (dir, frames + 0, hash2, newblock, 0);
-+ dx_unlock_bh(frames[0].bh);
-+
-+ /* now old and new 2nd level index blocks contain
-+ * all pointers, so dx_probe() may find it in the both.
-+ * it's OK -bzzz */
-+
-+ dx_lock_bh(frame->bh);
-+ dx_set_count(entries, icount1);
-+ dx_unlock_bh(frame->bh);
-+
-+ /* now old 2nd level index block points to first half
-+ * of leafs. it's importand that dx_probe() must
-+ * check root index block for changes under
-+ * dx_lock_bh(frame->bh) -bzzz */
-+
-+ ext3_unlock_htree(dir, ri_lock);
-+
- dxtrace(dx_show_index ("node", frames[1].entries));
- dxtrace(dx_show_index ("node",
- ((struct dx_node *) bh2->b_data)->entries));
-@@ -1426,38 +1722,61 @@
- goto journal_error;
- brelse (bh2);
- } else {
-+ unsigned long leaf = frame->leaf;
-+
- dxtrace(printk("Creating second level index...\n"));
- memcpy((char *) entries2, (char *) entries,
- icount * sizeof(struct dx_entry));
- dx_set_limit(entries2, dx_node_limit(dir));
-
- /* Set up root */
-+ dx_lock_bh(frames[0].bh);
- dx_set_count(entries, 1);
- dx_set_block(entries + 0, newblock);
- ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
-+ dx_unlock_bh(frames[0].bh);
-
- /* Add new access path frame */
- frame = frames + 1;
- frame->at = at = at - entries + entries2;
- frame->entries = entries = entries2;
- frame->bh = bh2;
-+ frame->curidx = newblock;
-+ frame->leaf = leaf;
- err = ext3_journal_get_write_access(handle,
- frame->bh);
- if (err)
- goto journal_error;
-+
-+ /* first level index was root. it's already initialized */
-+ /* we my unlock it now */
-+ ext3_unlock_htree(dir, idx_lock);
-+
-+ /* current index is just created 2nd level index */
-+ curidx = newblock;
-+ idx_lock = nb_lock;
- }
- ext3_journal_dirty_metadata(handle, frames[0].bh);
- }
-- de = do_split(handle, dir, &bh, frame, &hinfo, &err);
-+ de = do_split(handle, dir, &bh, frame, &hinfo, &newleaf_lock, &err);
- if (!de)
- goto cleanup;
-+
-+ /* index splitted */
-+ ext3_unlock_htree(dir, idx_lock);
-+
- err = add_dirent_to_buf(handle, dentry, inode, de, bh);
-+
-+ if (newleaf_lock)
-+ ext3_unlock_htree(dir, newleaf_lock);
-+
- bh = 0;
- goto cleanup;
-
- journal_error:
- ext3_std_error(dir->i_sb, err);
- cleanup:
-+ ext3_unlock_htree(dir, leaf_lock);
- if (bh)
- brelse(bh);
- dx_release(frames);
-@@ -1901,6 +2220,7 @@
- struct buffer_head * bh;
- struct ext3_dir_entry_2 * de;
- handle_t *handle;
-+ void *lock;
-
- handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
- if (IS_ERR(handle)) {
-@@ -1908,7 +2228,7 @@
- }
-
- retval = -ENOENT;
-- bh = ext3_find_entry (dentry, &de);
-+ bh = ext3_find_entry (dentry, &de, 1, &lock);
- if (!bh)
- goto end_rmdir;
-
-@@ -1919,14 +2239,19 @@
- DQUOT_INIT(inode);
-
- retval = -EIO;
-- if (le32_to_cpu(de->inode) != inode->i_ino)
-+ if (le32_to_cpu(de->inode) != inode->i_ino) {
-+ ext3_unlock_htree(dir, lock);
- goto end_rmdir;
-+ }
-
- retval = -ENOTEMPTY;
-- if (!empty_dir (inode))
-+ if (!empty_dir (inode)) {
-+ ext3_unlock_htree(dir, lock);
- goto end_rmdir;
-+ }
-
- retval = ext3_delete_entry(handle, dir, de, bh);
-+ ext3_unlock_htree(dir, lock);
- if (retval)
- goto end_rmdir;
- if (inode->i_nlink != 2)
-@@ -1985,6 +2310,7 @@
- struct buffer_head * bh;
- struct ext3_dir_entry_2 * de;
- handle_t *handle;
-+ void *lock;
-
- handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
- if (IS_ERR(handle)) {
-@@ -1995,7 +2321,7 @@
- handle->h_sync = 1;
-
- retval = -ENOENT;
-- bh = ext3_find_entry (dentry, &de);
-+ bh = ext3_find_entry (dentry, &de, 1, &lock);
- if (!bh)
- goto end_unlink;
-
-@@ -2003,8 +2329,10 @@
- DQUOT_INIT(inode);
-
- retval = -EIO;
-- if (le32_to_cpu(de->inode) != inode->i_ino)
-+ if (le32_to_cpu(de->inode) != inode->i_ino) {
-+ ext3_unlock_htree(dir, lock);
- goto end_unlink;
-+ }
-
- if (!inode->i_nlink) {
- ext3_warning (inode->i_sb, "ext3_unlink",
-@@ -2013,6 +2341,7 @@
- inode->i_nlink = 1;
- }
- retval = ext3_delete_entry(handle, dir, de, bh);
-+ ext3_unlock_htree(dir, lock);
- if (retval)
- goto end_unlink;
- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-@@ -2151,6 +2480,7 @@
- struct buffer_head * old_bh, * new_bh, * dir_bh;
- struct ext3_dir_entry_2 * old_de, * new_de;
- int retval;
-+ void *lock1 = NULL, *lock2 = NULL, *lock3 = NULL;
-
- old_bh = new_bh = dir_bh = NULL;
-
-@@ -2163,7 +2493,10 @@
- if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
- handle->h_sync = 1;
-
-- old_bh = ext3_find_entry (old_dentry, &old_de);
-+ if (old_dentry->d_parent == new_dentry->d_parent)
-+ down(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
-+
-+ old_bh = ext3_find_entry (old_dentry, &old_de, 1, &lock1 /* FIXME */);
- /*
- * Check for inode number is _not_ due to possible IO errors.
- * We might rmdir the source, keep it as pwd of some process
-@@ -2176,7 +2509,7 @@
- goto end_rename;
-
- new_inode = new_dentry->d_inode;
-- new_bh = ext3_find_entry (new_dentry, &new_de);
-+ new_bh = ext3_find_entry (new_dentry, &new_de, 1, &lock2 /* FIXME */);
- if (new_bh) {
- if (!new_inode) {
- brelse (new_bh);
-@@ -2239,7 +2572,7 @@
- struct buffer_head *old_bh2;
- struct ext3_dir_entry_2 *old_de2;
-
-- old_bh2 = ext3_find_entry(old_dentry, &old_de2);
-+ old_bh2 = ext3_find_entry(old_dentry, &old_de2, 1, &lock3 /* FIXME */);
- if (old_bh2) {
- retval = ext3_delete_entry(handle, old_dir,
- old_de2, old_bh2);
-@@ -2282,6 +2615,14 @@
- retval = 0;
-
- end_rename:
-+ if (lock1)
-+ ext3_unlock_htree(old_dentry->d_parent->d_inode, lock1);
-+ if (lock2)
-+ ext3_unlock_htree(new_dentry->d_parent->d_inode, lock2);
-+ if (lock3)
-+ ext3_unlock_htree(old_dentry->d_parent->d_inode, lock3);
-+ if (old_dentry->d_parent == new_dentry->d_parent)
-+ up(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
- brelse (dir_bh);
- brelse (old_bh);
- brelse (new_bh);
-@@ -2290,6 +2631,29 @@
- }
-
- /*
-+ * this locking primitives are used to protect parts
-+ * of dir's htree. protection unit is block: leaf or index
-+ */
-+static inline void *ext3_lock_htree(struct inode *dir,
-+ unsigned long value, int rwlock)
-+{
-+ void *lock;
-+
-+ if (!test_opt(dir->i_sb, PDIROPS))
-+ return NULL;
-+ lock = dynlock_lock(&EXT3_I(dir)->i_htree_lock, value, 1, GFP_KERNEL);
-+ return lock;
-+}
-+
-+static inline void ext3_unlock_htree(struct inode *dir,
-+ void *lock)
-+{
-+ if (!test_opt(dir->i_sb, PDIROPS) || !lock)
-+ return;
-+ dynlock_unlock(&EXT3_I(dir)->i_htree_lock, lock);
-+}
-+
-+/*
- * directories can handle most operations...
- */
- struct inode_operations ext3_dir_inode_operations = {
-Index: linux-2.4.24/fs/ext3/super.c
-===================================================================
---- linux-2.4.24.orig/fs/ext3/super.c 2004-04-01 20:22:17.000000000 +0400
-+++ linux-2.4.24/fs/ext3/super.c 2004-04-01 20:24:48.000000000 +0400
-@@ -702,6 +702,8 @@
- return 0;
- }
- }
-+ else if (!strcmp (this_char, "pdirops"))
-+ set_opt (sbi->s_mount_opt, PDIROPS);
- else if (!strcmp (this_char, "grpid") ||
- !strcmp (this_char, "bsdgroups"))
- set_opt (*mount_options, GRPID);
-@@ -728,6 +730,9 @@
- if (want_numeric(value, "sb", sb_block))
- return 0;
- }
-+ else if (!strcmp (this_char, "pdirops")) {
-+ set_opt (sbi->s_mount_opt, PDIROPS);
-+ }
- #ifdef CONFIG_JBD_DEBUG
- else if (!strcmp (this_char, "ro-after")) {
- unsigned long v;
-@@ -891,6 +896,10 @@
- ext3_check_inodes_bitmap (sb);
- }
- #endif
-+#ifdef S_PDIROPS
-+ if (test_opt (sb, PDIROPS))
-+ sb->s_flags |= S_PDIROPS;
-+#endif
- setup_ro_after(sb);
- return res;
- }
-@@ -1393,6 +1402,11 @@
- test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
- "writeback");
-
-+ if (test_opt(sb, PDIROPS)) {
-+ printk (KERN_INFO "EXT3-fs: mounted filesystem with parallel dirops\n");
-+ sb->s_flags |= S_PDIROPS;
-+ }
-+
- return sb;
-
- failed_mount3:
-Index: linux-2.4.24/fs/ext3/inode.c
-===================================================================
---- linux-2.4.24.orig/fs/ext3/inode.c 2004-04-01 20:22:18.000000000 +0400
-+++ linux-2.4.24/fs/ext3/inode.c 2004-04-01 20:24:48.000000000 +0400
-@@ -2239,6 +2239,9 @@
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &ext3_dir_inode_operations;
- inode->i_fop = &ext3_dir_operations;
-+ dynlock_init(&EXT3_I(inode)->i_htree_lock);
-+ sema_init(&EXT3_I(inode)->i_rename_sem, 1);
-+ sema_init(&EXT3_I(inode)->i_append_sem, 1);
- } else if (S_ISLNK(inode->i_mode)) {
- if (ext3_inode_is_fast_symlink(inode))
- inode->i_op = &ext3_fast_symlink_inode_operations;
-Index: linux-2.4.24/fs/ext3/ialloc.c
-===================================================================
---- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-04-01 20:22:17.000000000 +0400
-+++ linux-2.4.24/fs/ext3/ialloc.c 2004-04-01 20:24:48.000000000 +0400
-@@ -606,6 +606,9 @@
- return ERR_PTR(-EDQUOT);
- }
- ext3_debug ("allocating inode %lu\n", inode->i_ino);
-+ dynlock_init(&EXT3_I(inode)->i_htree_lock);
-+ sema_init(&EXT3_I(inode)->i_rename_sem, 1);
-+ sema_init(&EXT3_I(inode)->i_append_sem, 1);
- return inode;
-
- fail:
-Index: linux-2.4.24/include/linux/ext3_fs.h
-===================================================================
---- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-04-01 20:22:17.000000000 +0400
-+++ linux-2.4.24/include/linux/ext3_fs.h 2004-04-01 20:24:48.000000000 +0400
-@@ -308,6 +308,7 @@
- /*
- * Mount flags
- */
-+#define EXT3_MOUNT_PDIROPS 0x800000/* Parallel dir operations */
- #define EXT3_MOUNT_CHECK 0x0001 /* Do mount-time checks */
- #define EXT3_MOUNT_GRPID 0x0004 /* Create files with directory's group */
- #define EXT3_MOUNT_DEBUG 0x0008 /* Some debugging messages */
-Index: linux-2.4.24/include/linux/ext3_fs_i.h
-===================================================================
---- linux-2.4.24.orig/include/linux/ext3_fs_i.h 2004-04-01 20:22:17.000000000 +0400
-+++ linux-2.4.24/include/linux/ext3_fs_i.h 2004-04-01 20:24:48.000000000 +0400
-@@ -17,6 +17,7 @@
- #define _LINUX_EXT3_FS_I
-
- #include <linux/rwsem.h>
-+#include <linux/dynlocks.h>
-
- /*
- * second extended file system inode data in memory
-@@ -76,6 +77,11 @@
- * by other means, so we have truncate_sem.
- */
- struct rw_semaphore truncate_sem;
-+
-+ /* following fields for parallel directory operations -bzzz */
-+ struct dynlock i_htree_lock;
-+ struct semaphore i_append_sem;
-+ struct semaphore i_rename_sem;
- };
-
- #endif /* _LINUX_EXT3_FS_I */