Whamcloud - gitweb
file xnu_types.h was initially added on branch b_port_step.
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-pdirops-2.4.18-chaos.patch
1  fs/ext3/ialloc.c          |    3 
2  fs/ext3/inode.c           |    3 
3  fs/ext3/namei.c           |  582 +++++++++++++++++++++++++++++++++++++---------
4  fs/ext3/super.c           |   14 +
5  include/linux/ext3_fs.h   |    1 
6  include/linux/ext3_fs_i.h |    6 
7  6 files changed, 500 insertions(+), 109 deletions(-)
8
9 --- linux-2.4.18/fs/ext3/namei.c~ext3-pdirops-2.4.18-chaos      2003-09-01 14:58:06.000000000 +0400
10 +++ linux-2.4.18-alexey/fs/ext3/namei.c 2003-09-02 11:46:15.000000000 +0400
11 @@ -52,6 +52,9 @@ static struct buffer_head *ext3_append(h
12  {
13         struct buffer_head *bh;
14  
15 +       /* with parallel dir operations all appends
16 +        * have to be serialized -bzzz */
17 +       down(&EXT3_I(inode)->i_append_sem);
18         *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
19  
20         if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
21 @@ -59,6 +62,8 @@ static struct buffer_head *ext3_append(h
22                 EXT3_I(inode)->i_disksize = inode->i_size;
23                 ext3_journal_get_write_access(handle,bh);
24         }
25 +       up(&EXT3_I(inode)->i_append_sem);
26 +       
27         return bh;
28  }
29  
30 @@ -135,6 +140,8 @@ struct dx_frame
31         struct buffer_head *bh;
32         struct dx_entry *entries;
33         struct dx_entry *at;
34 +       unsigned long leaf;
35 +       unsigned int curidx;
36  };
37  
38  struct dx_map_entry
39 @@ -143,6 +150,30 @@ struct dx_map_entry
40         u32 offs;
41  };
42  
43 +/* FIXME: this should be reworked using bb_spin_lock
44 + * introduced in -mm tree
45 + */
46 +#define BH_DXLock      25
47 +
48 +static inline void dx_lock_bh(struct buffer_head volatile *bh)
49 +{
50 +#ifdef CONFIG_SMP
51 +        while (test_and_set_bit(BH_DXLock, &bh->b_state)) {
52 +                while (test_bit(BH_DXLock, &bh->b_state))
53 +                        cpu_relax();
54 +        }
55 +#endif
56 +}
57 +
58 +static inline void dx_unlock_bh(struct buffer_head *bh)
59 +{
60 +#ifdef CONFIG_SMP
61 +        smp_mb__before_clear_bit();
62 +        clear_bit(BH_DXLock, &bh->b_state);
63 +#endif
64 +}
65 +
66 +
67  #ifdef CONFIG_EXT3_INDEX
68  static inline unsigned dx_get_block (struct dx_entry *entry);
69  static void dx_set_block (struct dx_entry *entry, unsigned value);
70 @@ -154,7 +185,7 @@ static void dx_set_count (struct dx_entr
71  static void dx_set_limit (struct dx_entry *entries, unsigned value);
72  static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
73  static unsigned dx_node_limit (struct inode *dir);
74 -static struct dx_frame *dx_probe(struct dentry *dentry,
75 +static struct dx_frame *dx_probe(struct qstr *name,
76                                  struct inode *dir,
77                                  struct dx_hash_info *hinfo,
78                                  struct dx_frame *frame,
79 @@ -166,15 +197,18 @@ static void dx_sort_map(struct dx_map_en
80  static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
81                 struct dx_map_entry *offsets, int count);
82  static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
83 -static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
84 +static void dx_insert_block (struct inode *, struct dx_frame *, u32, u32, u32);
85  static int ext3_htree_next_block(struct inode *dir, __u32 hash,
86                                  struct dx_frame *frame,
87                                  struct dx_frame *frames, int *err,
88                                  __u32 *start_hash);
89  static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
90 -                      struct ext3_dir_entry_2 **res_dir, int *err);
91 +                      struct ext3_dir_entry_2 **res_dir, int *err,
92 +                      int rwlock, void **lock);
93  static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
94                              struct inode *inode);
95 +static inline void *ext3_lock_htree(struct inode *, unsigned long, int);
96 +static inline void ext3_unlock_htree(struct inode *, void *);
97  
98  /*
99   * Future: use high four bits of block for coalesce-on-delete flags
100 @@ -307,6 +341,94 @@ struct stats dx_show_entries(struct dx_h
101  #endif /* DX_DEBUG */
102  
103  /*
104 + * dx_find_position
105 + *
106 + * search position of specified hash in index
107 + *
108 + */
109 +
110 +struct dx_entry * dx_find_position(struct dx_entry * entries, u32 hash)
111 +{
112 +       struct dx_entry *p, *q, *m;
113 +       int count;
114 +
115 +       count = dx_get_count(entries);
116 +       p = entries + 1;
117 +       q = entries + count - 1;
118 +       while (p <= q)
119 +       {
120 +               m = p + (q - p)/2;
121 +               if (dx_get_hash(m) > hash)
122 +                       q = m - 1;
123 +               else
124 +                       p = m + 1;
125 +       }
126 +       return p - 1;
127 +}
128 +
129 +/*
130 + * returns 1 if path is unchanged
131 + */
132 +int dx_check_path(struct dx_frame *frame, u32 hash)
133 +{
134 +       struct dx_entry *p;
135 +       int ret = 1;
136 +
137 +       dx_lock_bh(frame->bh);
138 +       p = dx_find_position(frame->entries, hash);
139 +       if (frame->leaf != dx_get_block(p))
140 +               ret = 0;
141 +       dx_unlock_bh(frame->bh);
142 +       
143 +       return ret;
144 +}
145 +
146 +/*
147 + * 0 - changed
148 + * 1 - hasn't changed
149 + */
150 +static int
151 +dx_check_full_path(struct dx_frame *frames, struct dx_hash_info *hinfo)
152 +{
153 +       struct dx_entry *p;
154 +       struct dx_frame *frame = frames;
155 +       u32 leaf;
156 +
157 +       /* check first level */
158 +       dx_lock_bh(frame->bh);
159 +       p = dx_find_position(frame->entries, hinfo->hash);
160 +       leaf = dx_get_block(p);
161 +       dx_unlock_bh(frame->bh);
162 +       
163 +       if (leaf != frame->leaf) 
164 +               return 0;
165 +       
166 +       /* is there 2nd level? */
167 +       frame++;
168 +       if (frame->bh == NULL)
169 +               return 1;
170 +
171 +       /* check second level */
172 +       dx_lock_bh(frame->bh);
173 +
174 +       /* probably 1st level got changed, check it */
175 +       if (!dx_check_path(frames, hinfo->hash)) {
176 +               /* path changed */
177 +               dx_unlock_bh(frame->bh);
178 +               return 0;
179 +       }
180 +
181 +       p = dx_find_position(frame->entries, hinfo->hash);
182 +       leaf = dx_get_block(p);
183 +       dx_unlock_bh(frame->bh);
184 +       
185 +       if (leaf != frame->leaf)
186 +               return 0;
187 +
188 +       return 1;
189 +}
190 +
191 +/*
192   * Probe for a directory leaf block to search.
193   *
194   * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
195 @@ -316,19 +438,20 @@ struct stats dx_show_entries(struct dx_h
196   * back to userspace.
197   */
198  static struct dx_frame *
199 -dx_probe(struct dentry *dentry, struct inode *dir,
200 +dx_probe(struct qstr *name, struct inode *dir,
201          struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
202  {
203 -       unsigned count, indirect;
204 -       struct dx_entry *at, *entries, *p, *q, *m;
205 +       unsigned indirect;
206 +       struct dx_entry *at, *entries;
207         struct dx_root *root;
208         struct buffer_head *bh;
209         struct dx_frame *frame = frame_in;
210         u32 hash;
211 +       unsigned int curidx;
212  
213         frame->bh = NULL;
214 -       if (dentry)
215 -               dir = dentry->d_parent->d_inode;
216 +       frame[1].bh = NULL;
217 +
218         if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
219                 goto fail;
220         root = (struct dx_root *) bh->b_data;
221 @@ -344,8 +467,8 @@ dx_probe(struct dentry *dentry, struct i
222         }
223         hinfo->hash_version = root->info.hash_version;
224         hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
225 -       if (dentry)
226 -               ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
227 +       if (name)
228 +               ext3fs_dirhash(name->name, name->len, hinfo);
229         hash = hinfo->hash;
230  
231         if (root->info.unused_flags & 1) {
232 @@ -357,7 +480,19 @@ dx_probe(struct dentry *dentry, struct i
233                 goto fail;
234         }
235  
236 +repeat:
237 +       curidx = 0;
238 +       entries = (struct dx_entry *) (((char *)&root->info) +
239 +                                      root->info.info_length);
240 +       assert(dx_get_limit(entries) == dx_root_limit(dir,
241 +                                                     root->info.info_length));
242 +       dxtrace (printk("Look up %x", hash));
243 +       dx_lock_bh(bh);
244 +       /* indirect must be initialized under bh lock because
245 +        * 2nd level creation procedure may change it and dx_probe()
246 +        * will suggest htree is still single-level -bzzz */
247         if ((indirect = root->info.indirect_levels) > 1) {
248 +               dx_unlock_bh(bh);
249                 ext3_warning(dir->i_sb, __FUNCTION__,
250                              "Unimplemented inode hash depth: %#06x",
251                              root->info.indirect_levels);
252 @@ -365,56 +500,46 @@ dx_probe(struct dentry *dentry, struct i
253                 *err = ERR_BAD_DX_DIR;
254                 goto fail;
255         }
256 -
257 -       entries = (struct dx_entry *) (((char *)&root->info) +
258 -                                      root->info.info_length);
259 -       assert(dx_get_limit(entries) == dx_root_limit(dir,
260 -                                                     root->info.info_length));
261 -       dxtrace (printk("Look up %x", hash));
262 +       
263         while (1)
264         {
265 -               count = dx_get_count(entries);
266 -               assert (count && count <= dx_get_limit(entries));
267 -               p = entries + 1;
268 -               q = entries + count - 1;
269 -               while (p <= q)
270 -               {
271 -                       m = p + (q - p)/2;
272 -                       dxtrace(printk("."));
273 -                       if (dx_get_hash(m) > hash)
274 -                               q = m - 1;
275 -                       else
276 -                               p = m + 1;
277 -               }
278 -
279 -               if (0) // linear search cross check
280 -               {
281 -                       unsigned n = count - 1;
282 -                       at = entries;
283 -                       while (n--)
284 -                       {
285 -                               dxtrace(printk(","));
286 -                               if (dx_get_hash(++at) > hash)
287 -                               {
288 -                                       at--;
289 -                                       break;
290 -                               }
291 -                       }
292 -                       assert (at == p - 1);
293 -               }
294 -
295 -               at = p - 1;
296 -               dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
297 +               at = dx_find_position(entries, hinfo->hash);
298 +               dxtrace(printk(" %x->%u\n",
299 +                               at == entries? 0: dx_get_hash(at),
300 +                               dx_get_block(at)));
301                 frame->bh = bh;
302                 frame->entries = entries;
303                 frame->at = at;
304 -               if (!indirect--) return frame;
305 -               if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
306 +               frame->curidx = curidx;
307 +               frame->leaf = dx_get_block(at);
308 +               if (!indirect--) {
309 +                       dx_unlock_bh(bh);
310 +                       return frame;
311 +               }
312 +               
313 +               /* step into next htree level */
314 +               curidx = dx_get_block(at);
315 +               dx_unlock_bh(bh);
316 +               if (!(bh = ext3_bread (NULL,dir, frame->leaf, 0, err)))
317                         goto fail2;
318 +               
319 +               dx_lock_bh(bh);
320 +               /* splitting may change root index block and move
321 +                * hash we're looking for into another index block
322 +                * so, we have to check this situation and repeat
323 +                * from begining if path got changed -bzzz */
324 +               if (!dx_check_path(frame, hash)) {
325 +                       dx_unlock_bh(bh);
326 +                       bh = frame->bh;
327 +                       indirect++;
328 +                       goto repeat;
329 +               }
330 +               
331                 at = entries = ((struct dx_node *) bh->b_data)->entries;
332                 assert (dx_get_limit(entries) == dx_node_limit (dir));
333                 frame++;
334         }
335 +       dx_unlock_bh(bh);
336  fail2:
337         while (frame >= frame_in) {
338                 brelse(frame->bh);
339 @@ -428,8 +553,7 @@ static void dx_release (struct dx_frame 
340  {
341         if (frames[0].bh == NULL)
342                 return;
343 -
344 -       if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
345 +       if (frames[1].bh != NULL)
346                 brelse(frames[1].bh);
347         brelse(frames[0].bh);
348  }
349 @@ -471,8 +595,10 @@ static int ext3_htree_next_block(struct 
350          * nodes need to be read.
351          */
352         while (1) {
353 -               if (++(p->at) < p->entries + dx_get_count(p->entries))
354 +               if (++(p->at) < p->entries + dx_get_count(p->entries)) {
355 +                       p->leaf = dx_get_block(p->at);
356                         break;
357 +               }
358                 if (p == frames)
359                         return 0;
360                 num_frames++;
361 @@ -498,13 +624,17 @@ static int ext3_htree_next_block(struct 
362          * block so no check is necessary
363          */
364         while (num_frames--) {
365 -               if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
366 -                                     0, err)))
367 +               u32 idx;
368 +               
369 +               idx = p->leaf = dx_get_block(p->at);
370 +               if (!(bh = ext3_bread(NULL, dir, idx, 0, err)))
371                         return -1; /* Failure */
372                 p++;
373                 brelse (p->bh);
374                 p->bh = bh;
375                 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
376 +               p->curidx = idx;
377 +               p->leaf = dx_get_block(p->at);
378         }
379         return 1;
380  }
381 @@ -544,7 +674,7 @@ int ext3_htree_fill_tree(struct file *di
382         dir = dir_file->f_dentry->d_inode;
383         hinfo.hash = start_hash;
384         hinfo.minor_hash = 0;
385 -       frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
386 +       frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
387         if (!frame)
388                 return err;
389  
390 @@ -626,7 +756,8 @@ static int dx_make_map (struct ext3_dir_
391                         count++;
392                 }
393                 /* XXX: do we need to check rec_len == 0 case? -Chris */
394 -               de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
395 +               de = (struct ext3_dir_entry_2 *)((char*)de +
396 +                               le16_to_cpu(de->rec_len));
397         }
398         return count;
399  }
400 @@ -659,7 +790,8 @@ static void dx_sort_map (struct dx_map_e
401         } while(more);
402  }
403  
404 -static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
405 +static void dx_insert_block(struct inode *dir, struct dx_frame *frame,
406 +                       u32 hash, u32 block, u32 idx)
407  {
408         struct dx_entry *entries = frame->entries;
409         struct dx_entry *old = frame->at, *new = old + 1;
410 @@ -671,6 +803,7 @@ static void dx_insert_block(struct dx_fr
411         dx_set_hash(new, hash);
412         dx_set_block(new, block);
413         dx_set_count(entries, count + 1);
414 +       
415  }
416  #endif
417  
418 @@ -753,7 +886,8 @@ static int inline search_dirblock(struct
419  
420         
421  static struct buffer_head * ext3_find_entry (struct dentry *dentry,
422 -                                       struct ext3_dir_entry_2 ** res_dir)
423 +                                       struct ext3_dir_entry_2 ** res_dir,
424 +                                       int rwlock, void **lock)
425  {
426         struct super_block * sb;
427         struct buffer_head * bh_use[NAMEI_RA_SIZE];
428 @@ -769,6 +903,7 @@ static struct buffer_head * ext3_find_en
429         int namelen;
430         const u8 *name;
431         unsigned blocksize;
432 +       int do_not_use_dx = 0;
433  
434         *res_dir = NULL;
435         sb = dir->i_sb;
436 @@ -777,9 +912,10 @@ static struct buffer_head * ext3_find_en
437         name = dentry->d_name.name;
438         if (namelen > EXT3_NAME_LEN)
439                 return NULL;
440 +repeat:
441  #ifdef CONFIG_EXT3_INDEX
442         if (is_dx(dir)) {
443 -               bh = ext3_dx_find_entry(dentry, res_dir, &err);
444 +               bh = ext3_dx_find_entry(dentry, res_dir, &err, rwlock, lock);
445                 /*
446                  * On success, or if the error was file not found,
447                  * return.  Otherwise, fall back to doing a search the
448 @@ -788,8 +924,14 @@ static struct buffer_head * ext3_find_en
449                 if (bh || (err != ERR_BAD_DX_DIR))
450                         return bh;
451                 dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
452 +               do_not_use_dx = 1;
453         }
454  #endif
455 +       *lock = ext3_lock_htree(dir, 0, rwlock);
456 +       if (is_dx(dir) && !do_not_use_dx) {
457 +               ext3_unlock_htree(dir, *lock);
458 +               goto repeat;
459 +       }
460         nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
461         start = EXT3_I(dir)->i_dir_start_lookup;
462         if (start >= nblocks)
463 @@ -861,12 +1003,17 @@ cleanup_and_exit:
464         /* Clean up the read-ahead blocks */
465         for (; ra_ptr < ra_max; ra_ptr++)
466                 brelse (bh_use[ra_ptr]);
467 +       if (!ret) {
468 +               ext3_unlock_htree(dir, *lock);
469 +               *lock = NULL;
470 +       }
471         return ret;
472  }
473  
474  #ifdef CONFIG_EXT3_INDEX
475  static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
476 -                      struct ext3_dir_entry_2 **res_dir, int *err)
477 +                      struct ext3_dir_entry_2 **res_dir, int *err,
478 +                      int rwlock, void **lock)
479  {
480         struct super_block * sb;
481         struct dx_hash_info     hinfo;
482 @@ -881,11 +1028,22 @@ static struct buffer_head * ext3_dx_find
483         struct inode *dir = dentry->d_parent->d_inode;
484         
485         sb = dir->i_sb;
486 -       if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err)))
487 +repeat:
488 +       if (!(frame = dx_probe (&dentry->d_name, dir, &hinfo, frames, err)))
489                 return NULL;
490 +       
491 +       *lock = ext3_lock_htree(dir, frame->leaf, rwlock);
492 +       /* while locking leaf we just found may get splitted
493 +        * so, we need another leaf. check this */
494 +       if (!dx_check_full_path(frames, &hinfo)) {
495 +               ext3_unlock_htree(dir, *lock);
496 +               dx_release(frames);
497 +               goto repeat;
498 +       }
499 +
500         hash = hinfo.hash;
501         do {
502 -               block = dx_get_block(frame->at);
503 +               block = frame->leaf;
504                 if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
505                         goto errout;
506                 de = (struct ext3_dir_entry_2 *) bh->b_data;
507 @@ -919,6 +1077,8 @@ static struct buffer_head * ext3_dx_find
508         *err = -ENOENT;
509  errout:
510         dxtrace(printk("%s not found\n", name));
511 +       ext3_unlock_htree(dir, *lock);
512 +       *lock = NULL;
513         dx_release (frames);
514         return NULL;
515  }
516 @@ -931,6 +1091,7 @@ static struct dentry *ext3_lookup(struct
517         struct ext3_dir_entry_2 * de;
518         struct buffer_head * bh;
519         struct dentry *alternate = NULL;
520 +       void *lock = NULL;
521  
522         if (dentry->d_name.len > EXT3_NAME_LEN)
523                 return ERR_PTR(-ENAMETOOLONG);
524 @@ -938,10 +1099,11 @@ static struct dentry *ext3_lookup(struct
525         if (ext3_check_for_iopen(dir, dentry))
526                 return NULL;
527  
528 -       bh = ext3_find_entry(dentry, &de);
529 +       bh = ext3_find_entry(dentry, &de, 0, &lock);
530         inode = NULL;
531         if (bh) {
532                 unsigned long ino = le32_to_cpu(de->inode);
533 +               ext3_unlock_htree(dir, lock);
534                 brelse (bh);
535                 inode = iget(dir->i_sb, ino);
536  
537 @@ -984,7 +1146,8 @@ dx_move_dirents(char *from, char *to, st
538         unsigned rec_len = 0;
539  
540         while (count--) {
541 -               struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
542 +               struct ext3_dir_entry_2 *de =
543 +                       (struct ext3_dir_entry_2 *) (from + map->offs);
544                 rec_len = EXT3_DIR_REC_LEN(de->name_len);
545                 memcpy (to, de, rec_len);
546                 ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
547 @@ -997,7 +1160,8 @@ dx_move_dirents(char *from, char *to, st
548  
549  static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
550  {
551 -       struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
552 +       struct ext3_dir_entry_2 *next, *to, *prev;
553 +       struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) base;
554         unsigned rec_len = 0;
555  
556         prev = to = de;
557 @@ -1019,7 +1183,8 @@ static struct ext3_dir_entry_2* dx_pack_
558  
559  static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
560                         struct buffer_head **bh,struct dx_frame *frame,
561 -                       struct dx_hash_info *hinfo, int *error)
562 +                       struct dx_hash_info *hinfo, void **target,
563 +                       int *error)
564  {
565         unsigned blocksize = dir->i_sb->s_blocksize;
566         unsigned count, continued;
567 @@ -1066,23 +1231,30 @@ static struct ext3_dir_entry_2 *do_split
568         hash2 = map[split].hash;
569         continued = hash2 == map[split - 1].hash;
570         dxtrace(printk("Split block %i at %x, %i/%i\n",
571 -               dx_get_block(frame->at), hash2, split, count-split));
572 -
573 +               frame->leaf, hash2, split, count-split));
574 +       
575         /* Fancy dance to stay within two buffers */
576         de2 = dx_move_dirents(data1, data2, map + split, count - split);
577         de = dx_pack_dirents(data1,blocksize);
578         de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
579         de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
580 -       dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
581 -       dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
582 +       dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data1, blocksize, 1));
583 +       dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data2, blocksize, 1));
584  
585         /* Which block gets the new entry? */
586 +       *target = NULL;
587         if (hinfo->hash >= hash2)
588         {
589                 swap(*bh, bh2);
590                 de = de2;
591 -       }
592 -       dx_insert_block (frame, hash2 + continued, newblock);
593 +
594 +               /* entry will be stored into new block
595 +                * we have to lock it before add_dirent_to_buf */
596 +               *target = ext3_lock_htree(dir, newblock, 1);
597 +       }
598 +       dx_lock_bh(frame->bh);
599 +       dx_insert_block (dir, frame, hash2 + continued, newblock, frame->curidx);
600 +       dx_unlock_bh(frame->bh);
601         err = ext3_journal_dirty_metadata (handle, bh2);
602         if (err)
603                 goto journal_error;
604 @@ -1156,7 +1328,8 @@ static int add_dirent_to_buf(handle_t *h
605         nlen = EXT3_DIR_REC_LEN(de->name_len);
606         rlen = le16_to_cpu(de->rec_len);
607         if (de->inode) {
608 -               struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
609 +               struct ext3_dir_entry_2 *de1 =
610 +                       (struct ext3_dir_entry_2 *)((char *)de + nlen);
611                 de1->rec_len = cpu_to_le16(rlen - nlen);
612                 de->rec_len = cpu_to_le16(nlen);
613                 de = de1;
614 @@ -1214,7 +1387,8 @@ static int make_indexed_dir(handle_t *ha
615         unsigned        blocksize;
616         struct dx_hash_info hinfo;
617         u32             block;
618 -               
619 +       void            *lock, *new_lock;
620 +
621         blocksize =  dir->i_sb->s_blocksize;
622         dxtrace(printk("Creating index\n"));
623         retval = ext3_journal_get_write_access(handle, bh);
624 @@ -1225,7 +1399,6 @@ static int make_indexed_dir(handle_t *ha
625         }
626         root = (struct dx_root *) bh->b_data;
627                 
628 -       EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
629         bh2 = ext3_append (handle, dir, &block, &retval);
630         if (!(bh2)) {
631                 brelse(bh);
632 @@ -1233,6 +1406,8 @@ static int make_indexed_dir(handle_t *ha
633         }
634         data1 = bh2->b_data;
635  
636 +       lock = ext3_lock_htree(dir, block, 1);
637 +
638         /* The 0th block becomes the root, move the dirents out */
639         de = (struct ext3_dir_entry_2 *) &root->info;
640         len = ((char *) root) + blocksize - (char *) de;
641 @@ -1261,13 +1436,25 @@ static int make_indexed_dir(handle_t *ha
642         frame->entries = entries;
643         frame->at = entries;
644         frame->bh = bh;
645 +       frame->curidx = 0;
646 +       frame->leaf = 0;
647 +       frame[1].bh = NULL;
648         bh = bh2;
649 -       de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
650 +       de = do_split(handle,dir, &bh, frame, &hinfo, &new_lock, &retval);
651         dx_release (frames);
652         if (!(de))
653 -               return retval;
654 +               goto cleanup;
655 +
656 +       retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
657 +cleanup:
658 +       if (new_lock)
659 +               ext3_unlock_htree(dir, new_lock);
660 +       /* we mark directory indexed in order to
661 +        * avoid races while htree being created -bzzz */
662 +       EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
663 +       ext3_unlock_htree(dir, lock);
664  
665 -       return add_dirent_to_buf(handle, dentry, inode, de, bh);
666 +       return retval;
667  }
668  #endif
669  
670 @@ -1296,11 +1483,13 @@ static int ext3_add_entry (handle_t *han
671         unsigned blocksize;
672         unsigned nlen, rlen;
673         u32 block, blocks;
674 +       void *lock;
675  
676         sb = dir->i_sb;
677         blocksize = sb->s_blocksize;
678         if (!dentry->d_name.len)
679                 return -EINVAL;
680 +repeat:
681  #ifdef CONFIG_EXT3_INDEX
682         if (is_dx(dir)) {
683                 retval = ext3_dx_add_entry(handle, dentry, inode);
684 @@ -1311,36 +1500,53 @@ static int ext3_add_entry (handle_t *han
685                 ext3_mark_inode_dirty(handle, dir);
686         }
687  #endif
688 +       lock = ext3_lock_htree(dir, 0, 1);
689 +       if (is_dx(dir)) {
690 +               /* we got lock for block 0
691 +                * probably previous holder of the lock
692 +                * created htree -bzzz */
693 +               ext3_unlock_htree(dir, lock);
694 +               goto repeat;
695 +       }
696 +       
697         blocks = dir->i_size >> sb->s_blocksize_bits;
698         for (block = 0, offset = 0; block < blocks; block++) {
699                 bh = ext3_bread(handle, dir, block, 0, &retval);
700 -               if(!bh)
701 +               if(!bh) {
702 +                       ext3_unlock_htree(dir, lock);
703                         return retval;
704 +               }
705                 retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
706 -               if (retval != -ENOSPC)
707 +               if (retval != -ENOSPC) {
708 +                       ext3_unlock_htree(dir, lock);
709                         return retval;
710 +               }
711  
712  #ifdef CONFIG_EXT3_INDEX
713                 if (blocks == 1 && !dx_fallback &&
714 -                   EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
715 -                       return make_indexed_dir(handle, dentry, inode, bh);
716 +                   EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) {
717 +                       retval = make_indexed_dir(handle, dentry, inode, bh);
718 +                       ext3_unlock_htree(dir, lock);
719 +                       return retval;
720 +               }
721  #endif
722                 brelse(bh);
723         }
724         bh = ext3_append(handle, dir, &block, &retval);
725 -       if (!bh)
726 +       if (!bh) {
727 +               ext3_unlock_htree(dir, lock);
728                 return retval;
729 +       }
730         de = (struct ext3_dir_entry_2 *) bh->b_data;
731         de->inode = 0;
732         de->rec_len = cpu_to_le16(rlen = blocksize);
733         nlen = 0;
734 -       return add_dirent_to_buf(handle, dentry, inode, de, bh);
735 +       retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
736 +       ext3_unlock_htree(dir, lock);
737 +       return retval;
738  }
739  
740  #ifdef CONFIG_EXT3_INDEX
741 -/*
742 - * Returns 0 for success, or a negative error value
743 - */
744  static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
745                              struct inode *inode)
746  {
747 @@ -1352,15 +1558,28 @@ static int ext3_dx_add_entry(handle_t *h
748         struct super_block * sb = dir->i_sb;
749         struct ext3_dir_entry_2 *de;
750         int err;
751 -
752 -       frame = dx_probe(dentry, 0, &hinfo, frames, &err);
753 +       int curidx;
754 +       void *idx_lock, *leaf_lock, *newleaf_lock;
755 +       
756 +repeat:
757 +       frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
758         if (!frame)
759                 return err;
760 -       entries = frame->entries;
761 -       at = frame->at;
762  
763 -       if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
764 +       /* we're going to chage leaf, so lock it first */
765 +       leaf_lock = ext3_lock_htree(dir, frame->leaf, 1);
766 +
767 +       /* while locking leaf we just found may get splitted
768 +        * so we need to check this */
769 +       if (!dx_check_full_path(frames, &hinfo)) {
770 +               ext3_unlock_htree(dir, leaf_lock);
771 +               dx_release(frames);
772 +               goto repeat;
773 +       }
774 +       if (!(bh = ext3_bread(handle,dir, frame->leaf, 0, &err))) {
775 +               printk("can't ext3_bread(%d) = %d\n", (int) frame->leaf, err);
776                 goto cleanup;
777 +       }
778  
779         BUFFER_TRACE(bh, "get_write_access");
780         err = ext3_journal_get_write_access(handle, bh);
781 @@ -1373,6 +1592,35 @@ static int ext3_dx_add_entry(handle_t *h
782                 goto cleanup;
783         }
784  
785 +       /* our leaf has no enough space. hence, we have to
786 +        * split it. so lock index for this leaf first */
787 +       curidx = frame->curidx;
788 +       idx_lock = ext3_lock_htree(dir, curidx, 1);
789 +
790 +       /* now check did path get changed? */
791 +       dx_release(frames);
792 +
793 +       frame = dx_probe(&dentry->d_name, dentry->d_parent->d_inode,
794 +                       &hinfo, frames, &err);
795 +       if (!frame) {
796 +               /* FIXME: error handling here */
797 +               brelse(bh);
798 +               ext3_unlock_htree(dir, idx_lock);
799 +               return err;
800 +       }
801 +       
802 +       if (frame->curidx != curidx) {
803 +               /* path has been changed. we have to drop old lock
804 +                * and repeat */
805 +               brelse(bh);
806 +               ext3_unlock_htree(dir, idx_lock);
807 +               ext3_unlock_htree(dir, leaf_lock);
808 +               dx_release(frames);
809 +               goto repeat;
810 +       }
811 +       entries = frame->entries;
812 +       at = frame->at;
813 +
814         /* Block full, should compress but for now just split */
815         dxtrace(printk("using %u of %u node entries\n",
816                        dx_get_count(entries), dx_get_limit(entries)));
817 @@ -1384,7 +1632,8 @@ static int ext3_dx_add_entry(handle_t *h
818                 struct dx_entry *entries2;
819                 struct dx_node *node2;
820                 struct buffer_head *bh2;
821 -
822 +               void *nb_lock;
823 +               
824                 if (levels && (dx_get_count(frames->entries) ==
825                                dx_get_limit(frames->entries))) {
826                         ext3_warning(sb, __FUNCTION__,
827 @@ -1395,6 +1644,7 @@ static int ext3_dx_add_entry(handle_t *h
828                 bh2 = ext3_append (handle, dir, &newblock, &err);
829                 if (!(bh2))
830                         goto cleanup;
831 +               nb_lock = ext3_lock_htree(dir, newblock, 1);
832                 node2 = (struct dx_node *)(bh2->b_data);
833                 entries2 = node2->entries;
834                 node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
835 @@ -1406,27 +1656,73 @@ static int ext3_dx_add_entry(handle_t *h
836                 if (levels) {
837                         unsigned icount1 = icount/2, icount2 = icount - icount1;
838                         unsigned hash2 = dx_get_hash(entries + icount1);
839 +                       void *ri_lock;
840 +
841 +                       /* we have to protect root htree index against
842 +                        * another dx_add_entry() which would want to
843 +                        * split it too -bzzz */
844 +                       ri_lock = ext3_lock_htree(dir, 0, 1);
845 +
846 +                       /* as root index block blocked we must repeat
847 +                        * searching for current position of our 2nd index -bzzz */
848 +                       dx_lock_bh(frame->bh);
849 +                       frames->at = dx_find_position(frames->entries, hinfo.hash);
850 +                       dx_unlock_bh(frame->bh);
851 +                       
852                         dxtrace(printk("Split index %i/%i\n", icount1, icount2));
853 -                               
854 -                       BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
855 +       
856 +                       BUFFER_TRACE(frame->bh, "get_write_access");
857                         err = ext3_journal_get_write_access(handle,
858                                                              frames[0].bh);
859                         if (err)
860                                 goto journal_error;
861 -                               
862 +                       
863 +                       /* copy index into new one */
864                         memcpy ((char *) entries2, (char *) (entries + icount1),
865                                 icount2 * sizeof(struct dx_entry));
866 -                       dx_set_count (entries, icount1);
867                         dx_set_count (entries2, icount2);
868                         dx_set_limit (entries2, dx_node_limit(dir));
869  
870                         /* Which index block gets the new entry? */
871                         if (at - entries >= icount1) {
872 +                               /* unlock index we won't use */
873 +                               ext3_unlock_htree(dir, idx_lock);
874 +                               idx_lock = nb_lock;
875                                 frame->at = at = at - entries - icount1 + entries2;
876 -                               frame->entries = entries = entries2;
877 +                               frame->entries = entries2;
878 +                               frame->curidx = curidx = newblock;
879                                 swap(frame->bh, bh2);
880 +                       } else {
881 +                               /* we'll use old index,so new one may be freed */
882 +                               ext3_unlock_htree(dir, nb_lock);
883                         }
884 -                       dx_insert_block (frames + 0, hash2, newblock);
885 +               
886 +                       /* NOTE: very subtle piece of code
887 +                        * competing dx_probe() may find 2nd level index in root
888 +                        * index, then we insert new index here and set new count
889 +                        * in that 2nd level index. so, dx_probe() may see 2nd
890 +                        * level index w/o hash it looks for. the solution is
891 +                        * to check root index after we locked just founded 2nd
892 +                        * level index -bzzz */
893 +                       dx_lock_bh(frames[0].bh);
894 +                       dx_insert_block (dir, frames + 0, hash2, newblock, 0);
895 +                       dx_unlock_bh(frames[0].bh);
896 +                       
897 +                       /* now old and new 2nd level index blocks contain
898 +                        * all pointers, so dx_probe() may find it in the both.
899 +                        * it's OK -bzzz */
900 +                       
901 +                       dx_lock_bh(frame->bh);
902 +                       dx_set_count(entries, icount1);
903 +                       dx_unlock_bh(frame->bh);
904 +
905 +                       /* now old 2nd level index block points to first half
906 +                        * of leafs. it's importand that dx_probe() must
907 +                        * check root index block for changes under
908 +                        * dx_lock_bh(frame->bh) -bzzz */
909 +
910 +                       ext3_unlock_htree(dir, ri_lock);
911 +               
912                         dxtrace(dx_show_index ("node", frames[1].entries));
913                         dxtrace(dx_show_index ("node",
914                                ((struct dx_node *) bh2->b_data)->entries));
915 @@ -1435,38 +1731,61 @@ static int ext3_dx_add_entry(handle_t *h
916                                 goto journal_error;
917                         brelse (bh2);
918                 } else {
919 +                       unsigned long leaf = frame->leaf;
920 +
921                         dxtrace(printk("Creating second level index...\n"));
922                         memcpy((char *) entries2, (char *) entries,
923                                icount * sizeof(struct dx_entry));
924                         dx_set_limit(entries2, dx_node_limit(dir));
925  
926                         /* Set up root */
927 +                       dx_lock_bh(frames[0].bh);
928                         dx_set_count(entries, 1);
929                         dx_set_block(entries + 0, newblock);
930                         ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
931 +                       dx_unlock_bh(frames[0].bh);
932  
933                         /* Add new access path frame */
934                         frame = frames + 1;
935                         frame->at = at = at - entries + entries2;
936                         frame->entries = entries = entries2;
937                         frame->bh = bh2;
938 +                       frame->curidx = newblock;
939 +                       frame->leaf = leaf;
940                         err = ext3_journal_get_write_access(handle,
941                                                              frame->bh);
942                         if (err)
943                                 goto journal_error;
944 +
945 +                       /* first level index was root. it's already initialized */
946 +                       /* we my unlock it now */
947 +                       ext3_unlock_htree(dir, idx_lock);
948 +
949 +                       /* current index is just created 2nd level index */
950 +                       curidx = newblock;
951 +                       idx_lock = nb_lock;
952                 }
953                 ext3_journal_dirty_metadata(handle, frames[0].bh);
954         }
955 -       de = do_split(handle, dir, &bh, frame, &hinfo, &err);
956 +       de = do_split(handle, dir, &bh, frame, &hinfo, &newleaf_lock, &err);
957         if (!de)
958                 goto cleanup;
959 +
960 +       /* index splitted */
961 +       ext3_unlock_htree(dir, idx_lock);
962 +       
963         err = add_dirent_to_buf(handle, dentry, inode, de, bh);
964 +
965 +       if (newleaf_lock)
966 +               ext3_unlock_htree(dir, newleaf_lock);
967 +       
968         bh = 0;
969         goto cleanup;
970         
971  journal_error:
972         ext3_std_error(dir->i_sb, err);
973  cleanup:
974 +       ext3_unlock_htree(dir, leaf_lock);
975         if (bh)
976                 brelse(bh);
977         dx_release(frames);
978 @@ -1899,6 +2218,7 @@ static int ext3_rmdir (struct inode * di
979         struct buffer_head * bh;
980         struct ext3_dir_entry_2 * de;
981         handle_t *handle;
982 +       void *lock;
983  
984         handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
985         if (IS_ERR(handle)) {
986 @@ -1906,7 +2226,7 @@ static int ext3_rmdir (struct inode * di
987         }
988  
989         retval = -ENOENT;
990 -       bh = ext3_find_entry (dentry, &de);
991 +       bh = ext3_find_entry (dentry, &de, 1, &lock);
992         if (!bh)
993                 goto end_rmdir;
994  
995 @@ -1917,14 +2237,19 @@ static int ext3_rmdir (struct inode * di
996         DQUOT_INIT(inode);
997  
998         retval = -EIO;
999 -       if (le32_to_cpu(de->inode) != inode->i_ino)
1000 +       if (le32_to_cpu(de->inode) != inode->i_ino) {
1001 +               ext3_unlock_htree(dir, lock);
1002                 goto end_rmdir;
1003 +       }
1004  
1005         retval = -ENOTEMPTY;
1006 -       if (!empty_dir (inode))
1007 +       if (!empty_dir (inode)) {
1008 +               ext3_unlock_htree(dir, lock);
1009                 goto end_rmdir;
1010 +       }
1011  
1012         retval = ext3_delete_entry(handle, dir, de, bh);
1013 +       ext3_unlock_htree(dir, lock);
1014         if (retval)
1015                 goto end_rmdir;
1016         if (inode->i_nlink != 2)
1017 @@ -1957,6 +2282,7 @@ static int ext3_unlink(struct inode * di
1018         struct buffer_head * bh;
1019         struct ext3_dir_entry_2 * de;
1020         handle_t *handle;
1021 +       void *lock;
1022  
1023         handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
1024         if (IS_ERR(handle)) {
1025 @@ -1967,7 +2293,7 @@ static int ext3_unlink(struct inode * di
1026                 handle->h_sync = 1;
1027  
1028         retval = -ENOENT;
1029 -       bh = ext3_find_entry (dentry, &de);
1030 +       bh = ext3_find_entry (dentry, &de, 1, &lock);
1031         if (!bh)
1032                 goto end_unlink;
1033  
1034 @@ -1975,8 +2301,10 @@ static int ext3_unlink(struct inode * di
1035         DQUOT_INIT(inode);
1036  
1037         retval = -EIO;
1038 -       if (le32_to_cpu(de->inode) != inode->i_ino)
1039 +       if (le32_to_cpu(de->inode) != inode->i_ino) {
1040 +               ext3_unlock_htree(dir, lock);
1041                 goto end_unlink;
1042 +       }
1043         
1044         if (!inode->i_nlink) {
1045                 ext3_warning (inode->i_sb, "ext3_unlink",
1046 @@ -1985,6 +2313,7 @@ static int ext3_unlink(struct inode * di
1047                 inode->i_nlink = 1;
1048         }
1049         retval = ext3_delete_entry(handle, dir, de, bh);
1050 +       ext3_unlock_htree(dir, lock);
1051         if (retval)
1052                 goto end_unlink;
1053         dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1054 @@ -2106,6 +2435,7 @@ static int ext3_rename (struct inode * o
1055         struct buffer_head * old_bh, * new_bh, * dir_bh;
1056         struct ext3_dir_entry_2 * old_de, * new_de;
1057         int retval;
1058 +       void *lock1 = NULL, *lock2 = NULL, *lock3 = NULL;
1059  
1060         old_bh = new_bh = dir_bh = NULL;
1061  
1062 @@ -2118,7 +2448,10 @@ static int ext3_rename (struct inode * o
1063         if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
1064                 handle->h_sync = 1;
1065  
1066 -       old_bh = ext3_find_entry (old_dentry, &old_de);
1067 +       if (old_dentry->d_parent == new_dentry->d_parent)
1068 +               down(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
1069 +
1070 +       old_bh = ext3_find_entry (old_dentry, &old_de, 1, &lock1 /* FIXME */);
1071         /*
1072          *  Check for inode number is _not_ due to possible IO errors.
1073          *  We might rmdir the source, keep it as pwd of some process
1074 @@ -2131,7 +2464,7 @@ static int ext3_rename (struct inode * o
1075                 goto end_rename;
1076  
1077         new_inode = new_dentry->d_inode;
1078 -       new_bh = ext3_find_entry (new_dentry, &new_de);
1079 +       new_bh = ext3_find_entry (new_dentry, &new_de, 1, &lock2 /* FIXME */);
1080         if (new_bh) {
1081                 if (!new_inode) {
1082                         brelse (new_bh);
1083 @@ -2194,7 +2527,7 @@ static int ext3_rename (struct inode * o
1084                 struct buffer_head *old_bh2;
1085                 struct ext3_dir_entry_2 *old_de2;
1086                 
1087 -               old_bh2 = ext3_find_entry(old_dentry, &old_de2);
1088 +               old_bh2 = ext3_find_entry(old_dentry, &old_de2, 1, &lock3 /* FIXME */);
1089                 if (old_bh2) {
1090                         retval = ext3_delete_entry(handle, old_dir,
1091                                                    old_de2, old_bh2);
1092 @@ -2237,6 +2570,14 @@ static int ext3_rename (struct inode * o
1093         retval = 0;
1094  
1095  end_rename:
1096 +       if (lock1)
1097 +               ext3_unlock_htree(old_dentry->d_parent->d_inode, lock1);
1098 +       if (lock2)
1099 +               ext3_unlock_htree(new_dentry->d_parent->d_inode, lock2);
1100 +       if (lock3)
1101 +               ext3_unlock_htree(old_dentry->d_parent->d_inode, lock3);
1102 +       if (old_dentry->d_parent == new_dentry->d_parent)
1103 +               up(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
1104         brelse (dir_bh);
1105         brelse (old_bh);
1106         brelse (new_bh);
1107 @@ -2245,6 +2586,29 @@ end_rename:
1108  }
1109  
1110  /*
1111 + * this locking primitives are used to protect parts
1112 + * of dir's htree. protection unit is block: leaf or index
1113 + */
1114 +static inline void *ext3_lock_htree(struct inode *dir,
1115 +                                       unsigned long value, int rwlock)
1116 +{
1117 +       void *lock;
1118 +       
1119 +       if (!test_opt(dir->i_sb, PDIROPS))
1120 +               return NULL;
1121 +       lock = dynlock_lock(&EXT3_I(dir)->i_htree_lock, value, 1, GFP_KERNEL);
1122 +       return lock;
1123 +}
1124 +
1125 +static inline void ext3_unlock_htree(struct inode *dir,
1126 +                                       void *lock)
1127 +{
1128 +       if (!test_opt(dir->i_sb, PDIROPS) || !lock)
1129 +               return;
1130 +       dynlock_unlock(&EXT3_I(dir)->i_htree_lock, lock);
1131 +}
1132 +
1133 +/*
1134   * directories can handle most operations...
1135   */
1136  struct inode_operations ext3_dir_inode_operations = {
1137 --- linux-2.4.18/fs/ext3/super.c~ext3-pdirops-2.4.18-chaos      2003-09-01 16:33:25.000000000 +0400
1138 +++ linux-2.4.18-alexey/fs/ext3/super.c 2003-09-02 12:46:29.000000000 +0400
1139 @@ -786,6 +786,8 @@ static int parse_options (char * options
1140                                 return 0;
1141                         }
1142                 }
1143 +               else if (!strcmp (this_char, "pdirops"))
1144 +                       set_opt (sbi->s_mount_opt, PDIROPS);
1145                 else if (!strcmp (this_char, "grpid") ||
1146                          !strcmp (this_char, "bsdgroups"))
1147                         set_opt (*mount_options, GRPID);
1148 @@ -812,6 +814,9 @@ static int parse_options (char * options
1149                         if (want_numeric(value, "sb", sb_block))
1150                                 return 0;
1151                 }
1152 +               else if (!strcmp (this_char, "pdirops")) {
1153 +                       set_opt (sbi->s_mount_opt, PDIROPS);
1154 +               }
1155  #ifdef CONFIG_JBD_DEBUG
1156                 else if (!strcmp (this_char, "ro-after")) {
1157                         unsigned long v;
1158 @@ -969,6 +974,10 @@ static int ext3_setup_super(struct super
1159                 ext3_check_inodes_bitmap (sb);
1160         }
1161  #endif
1162 +#ifdef S_PDIROPS
1163 +       if (test_opt (sb, PDIROPS))
1164 +               sb->s_flags |= S_PDIROPS;
1165 +#endif
1166         setup_ro_after(sb);
1167         return res;
1168  }
1169 @@ -1463,6 +1472,11 @@ struct super_block * ext3_read_super (st
1170                 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
1171                 "writeback");
1172  
1173 +       if (test_opt(sb, PDIROPS)) {
1174 +               printk (KERN_INFO "EXT3-fs: mounted filesystem with parallel dirops\n");
1175 +               sb->s_flags |= S_PDIROPS;
1176 +       }
1177 +               
1178         return sb;
1179  
1180  failed_mount3:
1181 --- linux-2.4.18/include/linux/ext3_fs.h~ext3-pdirops-2.4.18-chaos      2003-09-01 14:58:06.000000000 +0400
1182 +++ linux-2.4.18-alexey/include/linux/ext3_fs.h 2003-09-02 11:46:15.000000000 +0400
1183 @@ -310,6 +310,7 @@ struct ext3_inode {
1184  /*
1185   * Mount flags
1186   */
1187 +#define EXT3_MOUNT_PDIROPS             0x800000/* Parallel dir operations */
1188  #define EXT3_MOUNT_CHECK               0x0001  /* Do mount-time checks */
1189  #define EXT3_MOUNT_GRPID               0x0004  /* Create files with directory's group */
1190  #define EXT3_MOUNT_DEBUG               0x0008  /* Some debugging messages */
1191 --- linux-2.4.18/include/linux/ext3_fs_i.h~ext3-pdirops-2.4.18-chaos    2003-08-29 11:57:30.000000000 +0400
1192 +++ linux-2.4.18-alexey/include/linux/ext3_fs_i.h       2003-09-02 11:46:15.000000000 +0400
1193 @@ -17,6 +17,7 @@
1194  #define _LINUX_EXT3_FS_I
1195  
1196  #include <linux/rwsem.h>
1197 +#include <linux/dynlocks.h>
1198  
1199  /*
1200   * second extended file system inode data in memory
1201 @@ -73,6 +74,11 @@ struct ext3_inode_info {
1202          * by other means, so we have truncate_sem.
1203          */
1204         struct rw_semaphore truncate_sem;
1205 +
1206 +       /* following fields for parallel directory operations -bzzz */
1207 +       struct dynlock i_htree_lock;
1208 +       struct semaphore i_append_sem;
1209 +       struct semaphore i_rename_sem;
1210  };
1211  
1212  #endif /* _LINUX_EXT3_FS_I */
1213 --- linux-2.4.18/fs/ext3/inode.c~ext3-pdirops-2.4.18-chaos      2003-09-01 16:33:25.000000000 +0400
1214 +++ linux-2.4.18-alexey/fs/ext3/inode.c 2003-09-02 11:46:15.000000000 +0400
1215 @@ -2454,6 +2454,9 @@ void ext3_read_inode(struct inode * inod
1216         } else if (S_ISDIR(inode->i_mode)) {
1217                 inode->i_op = &ext3_dir_inode_operations;
1218                 inode->i_fop = &ext3_dir_operations;
1219 +               dynlock_init(&EXT3_I(inode)->i_htree_lock);
1220 +               sema_init(&EXT3_I(inode)->i_rename_sem, 1);
1221 +               sema_init(&EXT3_I(inode)->i_append_sem, 1);
1222         } else if (S_ISLNK(inode->i_mode)) {
1223                 if (ext3_inode_is_fast_symlink(inode))
1224                         inode->i_op = &ext3_fast_symlink_inode_operations;
1225 --- linux-2.4.18/fs/ext3/ialloc.c~ext3-pdirops-2.4.18-chaos     2003-09-01 14:58:05.000000000 +0400
1226 +++ linux-2.4.18-alexey/fs/ext3/ialloc.c        2003-09-02 11:46:15.000000000 +0400
1227 @@ -601,6 +601,9 @@ repeat:
1228                 return ERR_PTR(-EDQUOT);
1229         }
1230         ext3_debug ("allocating inode %lu\n", inode->i_ino);
1231 +       dynlock_init(&EXT3_I(inode)->i_htree_lock);
1232 +       sema_init(&EXT3_I(inode)->i_rename_sem, 1);
1233 +       sema_init(&EXT3_I(inode)->i_append_sem, 1);
1234         return inode;
1235  
1236  fail:
1237
1238 _