Whamcloud - gitweb
b=16893
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / iopen-2.6.18-rhel5-ext4.patch
1 Index: linux-2.6.18-128.1.6/fs/ext4/iopen.c
2 ===================================================================
3 --- /dev/null
4 +++ linux-2.6.18-128.1.6/fs/ext4/iopen.c
5 @@ -0,0 +1,295 @@
6 +/*
7 + * linux/fs/ext4/iopen.c
8 + *
9 + * Special support for open by inode number
10 + *
11 + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
12 + *
13 + * This file may be redistributed under the terms of the GNU General
14 + * Public License.
15 + *
16 + *
17 + * Invariants:
18 + *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
19 + *     for an inode at one time.
20 + *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
21 + *     aliases on an inode at the same time.
22 + *
23 + * If we have any connected dentry aliases for an inode, use one of those
24 + * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
25 + * dentry for this inode, which thereafter will be found by the dcache
26 + * when looking up this inode number in __iopen__, so we don't return here
27 + * until it is gone.
28 + *
29 + * If we get an inode via a regular name lookup, then we "rename" the
30 + * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
31 + * existing users of the disconnected dentry will continue to use the same
32 + * dentry as the connected users, and there will never be both kinds of
33 + * dentry aliases at one time.
34 + */
35 +
36 +#include <linux/sched.h>
37 +#include <linux/fs.h>
38 +#include <linux/smp_lock.h>
39 +#include <linux/dcache.h>
40 +#include <linux/security.h>
41 +#include "iopen.h"
42 +#include "ext4.h"
43 +#include "ext4_jbd2.h"
44 +
45 +#ifndef assert
46 +#define assert(test) J_ASSERT(test)
47 +#endif
48 +
49 +#define IOPEN_NAME_LEN 32
50 +
51 +/*
52 + * This implements looking up an inode by number.
53 + */
54 +static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry,
55 +                                  struct nameidata *nd)
56 +{
57 +       struct inode *inode;
58 +       unsigned long ino;
59 +       struct list_head *lp;
60 +       struct dentry *alternate;
61 +       char buf[IOPEN_NAME_LEN];
62 +
63 +       if (dentry->d_name.len >= IOPEN_NAME_LEN)
64 +               return ERR_PTR(-ENAMETOOLONG);
65 +
66 +       memcpy(buf, dentry->d_name.name, dentry->d_name.len);
67 +       buf[dentry->d_name.len] = 0;
68 +
69 +       if (strcmp(buf, ".") == 0)
70 +               ino = dir->i_ino;
71 +       else if (strcmp(buf, "..") == 0)
72 +               ino = EXT4_ROOT_INO;
73 +       else
74 +               ino = simple_strtoul(buf, 0, 0);
75 +
76 +       if ((ino != EXT4_ROOT_INO &&
77 +            ino < EXT4_FIRST_INO(dir->i_sb)) ||
78 +           ino > le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))
79 +               return ERR_PTR(-ENOENT);
80 +
81 +       inode = ext4_iget(dir->i_sb, ino);
82 +       if (IS_ERR(inode)) {
83 +                /* Newer kernels return -ESTALE for inodes that are not in use,
84 +                 * but older kernels return a negative dentry. This can only
85 +                 * happen when doing a lookup in the __iopen__ dir, because the
86 +                 * "entry" will always be found even if inode is unallocated.
87 +                 * Handle this here instead of fixing the callers. b=19114 */
88 +               if (PTR_ERR(inode) == -ESTALE)
89 +                       return (ERR_PTR(-ENOENT));
90 +               return ERR_CAST(inode);
91 +       }
92 +
93 +       assert(list_empty(&dentry->d_alias));           /* d_instantiate */
94 +       assert(d_unhashed(dentry));                     /* d_rehash */
95 +
96 +       /* preferrably return a connected dentry */
97 +       spin_lock(&dcache_lock);
98 +       list_for_each(lp, &inode->i_dentry) {
99 +               alternate = list_entry(lp, struct dentry, d_alias);
100 +               assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
101 +       }
102 +
103 +       if (!list_empty(&inode->i_dentry)) {
104 +               alternate = list_entry(inode->i_dentry.next,
105 +                                      struct dentry, d_alias);
106 +               dget_locked(alternate);
107 +               spin_lock(&alternate->d_lock);
108 +               alternate->d_flags |= DCACHE_REFERENCED;
109 +               spin_unlock(&alternate->d_lock);
110 +               iput(inode);
111 +               spin_unlock(&dcache_lock);
112 +               return alternate;
113 +       }
114 +       dentry->d_flags |= DCACHE_DISCONNECTED;
115 +
116 +       /* d_add(), but don't drop dcache_lock before adding dentry to inode */
117 +       list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
118 +       dentry->d_inode = inode;
119 +
120 +       d_rehash_cond(dentry, 0);
121 +       spin_unlock(&dcache_lock);
122 +
123 +       return NULL;
124 +}
125 +
126 +/* This function is spliced into ext4_lookup and does the move of a
127 + * disconnected dentry (if it exists) to a connected dentry.
128 + */
129 +struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode,
130 +                                   int rehash)
131 +{
132 +       struct dentry *tmp, *goal = NULL;
133 +       struct list_head *lp;
134 +
135 +       /* verify this dentry is really new */
136 +       assert(dentry->d_inode == NULL);
137 +       assert(list_empty(&dentry->d_alias));           /* d_instantiate */
138 +       if (rehash)
139 +               assert(d_unhashed(dentry));             /* d_rehash */
140 +       assert(list_empty(&dentry->d_subdirs));
141 +
142 +       spin_lock(&dcache_lock);
143 +       if (!inode)
144 +               goto do_rehash;
145 +
146 +       if (!test_opt(inode->i_sb, IOPEN))
147 +               goto do_instantiate;
148 +
149 +       /* preferrably return a connected dentry */
150 +       list_for_each(lp, &inode->i_dentry) {
151 +               tmp = list_entry(lp, struct dentry, d_alias);
152 +               if (tmp->d_flags & DCACHE_DISCONNECTED) {
153 +                       assert(tmp->d_alias.next == &inode->i_dentry);
154 +                       assert(tmp->d_alias.prev == &inode->i_dentry);
155 +                       goal = tmp;
156 +                       dget_locked(goal);
157 +                       break;
158 +               }
159 +       }
160 +
161 +       if (!goal)
162 +               goto do_instantiate;
163 +
164 +       /* Move the goal to the de hash queue */
165 +       goal->d_flags &= ~DCACHE_DISCONNECTED;
166 +       security_d_instantiate(goal, inode);
167 +       __d_drop(dentry);
168 +       d_rehash_cond(dentry, 0);
169 +       d_move_locked(goal, dentry);
170 +       spin_unlock(&dcache_lock);
171 +       iput(inode);
172 +
173 +       return goal;
174 +
175 +       /* d_add(), but don't drop dcache_lock before adding dentry to inode */
176 +do_instantiate:
177 +       list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
178 +       dentry->d_inode = inode;
179 +do_rehash:
180 +       if (rehash)
181 +               d_rehash_cond(dentry, 0);
182 +       spin_unlock(&dcache_lock);
183 +
184 +       return NULL;
185 +}
186 +
187 +/*
188 + * Similar as d_instantiate() except that it drops the disconnected
189 + * dentry if any.
190 + */
191 +void iopen_d_instantiate(struct dentry *dentry, struct inode * inode)
192 +{
193 +       struct dentry *dis_dentry;
194 +
195 +       /* verify this dentry is really new */
196 +       assert(dentry->d_inode == NULL);
197 +       assert(list_empty(&dentry->d_alias));
198 +
199 +       spin_lock(&dcache_lock);
200 +       if (!inode || !test_opt(inode->i_sb, IOPEN) ||
201 +           list_empty(&inode->i_dentry))
202 +               goto do_instantiate;
203 +
204 +       /* a disconnected dentry has been added in our back,
205 +        * we have to drop this dentry, see bug 16362/15713*/
206 +       dis_dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
207 +       spin_lock(&dis_dentry->d_lock);
208 +       assert(dis_dentry->d_alias.next == &inode->i_dentry);
209 +       assert(dis_dentry->d_alias.prev == &inode->i_dentry);
210 +       assert(dis_dentry->d_flags & DCACHE_DISCONNECTED);
211 +       __d_drop(dis_dentry);
212 +       list_del_init(&dis_dentry->d_alias);
213 +       spin_unlock(&dis_dentry->d_lock);
214 +
215 +do_instantiate:
216 +       if (inode)
217 +               list_add(&dentry->d_alias, &inode->i_dentry);
218 +       dentry->d_inode = inode;
219 +       spin_unlock(&dcache_lock);
220 +       security_d_instantiate(dentry, inode);
221 +}
222 +
223 +/*
224 + * These are the special structures for the iopen pseudo directory.
225 + */
226 +
227 +static struct inode_operations iopen_inode_operations = {
228 +       lookup:         iopen_lookup,           /* BKL held */
229 +};
230 +
231 +static struct file_operations iopen_file_operations = {
232 +       read:           generic_read_dir,
233 +};
234 +
235 +static int match_dentry(struct dentry *dentry, const char *name)
236 +{
237 +       int     len;
238 +
239 +       len = strlen(name);
240 +       if (dentry->d_name.len != len)
241 +               return 0;
242 +       if (strncmp(dentry->d_name.name, name, len))
243 +               return 0;
244 +       return 1;
245 +}
246 +
247 +/*
248 + * This function is spliced into ext4_lookup and returns 1 the file
249 + * name is __iopen__ and dentry has been filled in appropriately.
250 + */
251 +int ext4_check_for_iopen(struct inode *dir, struct dentry *dentry)
252 +{
253 +       struct inode *inode;
254 +
255 +       if (dir->i_ino != EXT4_ROOT_INO ||
256 +           !test_opt(dir->i_sb, IOPEN) ||
257 +           !match_dentry(dentry, "__iopen__"))
258 +               return 0;
259 +
260 +       inode = ext4_iget(dir->i_sb, EXT4_BAD_INO);
261 +       if (IS_ERR(inode))
262 +               return 0;
263 +
264 +       d_add(dentry, inode);
265 +       return 1;
266 +}
267 +
268 +/*
269 + * This function is spliced into read_inode; it returns 1 if inode
270 + * number is the one for /__iopen__, in which case the inode is filled
271 + * in appropriately.  Otherwise, this fuction returns 0.
272 + */
273 +int ext4_iopen_get_inode(struct inode *inode)
274 +{
275 +       if (inode->i_ino != EXT4_BAD_INO)
276 +               return 0;
277 +
278 +       inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
279 +       if (test_opt(inode->i_sb, IOPEN_NOPRIV))
280 +               inode->i_mode |= 0777;
281 +       inode->i_uid = 0;
282 +       inode->i_gid = 0;
283 +       inode->i_nlink = 1;
284 +       inode->i_size = 4096;
285 +       inode->i_atime = inode->i_ctime = inode->i_mtime =  ext4_current_time(inode);
286 +       EXT4_I(inode)->i_dtime = 0;
287 +       EXT4_I(inode)->i_file_acl = 0;
288 +       inode->i_blocks = 0;
289 +       inode->i_version = 1;
290 +       inode->i_generation = 0;
291 +
292 +       inode->i_op = &iopen_inode_operations;
293 +       inode->i_fop = &iopen_file_operations;
294 +       inode->i_mapping->a_ops = 0;
295 +
296 +       if (inode->i_state & I_NEW)
297 +               unlock_new_inode(inode);
298 +
299 +       return 1;
300 +}
301 Index: linux-2.6.18-128.1.6/fs/ext4/iopen.h
302 ===================================================================
303 --- /dev/null
304 +++ linux-2.6.18-128.1.6/fs/ext4/iopen.h
305 @@ -0,0 +1,16 @@
306 +/*
307 + * iopen.h
308 + *
309 + * Special support for opening files by inode number.
310 + *
311 + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
312 + *
313 + * This file may be redistributed under the terms of the GNU General
314 + * Public License.
315 + */
316 +
317 +extern int ext4_check_for_iopen(struct inode *dir, struct dentry *dentry);
318 +extern int ext4_iopen_get_inode(struct inode *inode);
319 +extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
320 +                                          struct inode *inode, int rehash);
321 +extern void iopen_d_instantiate(struct dentry *dentry, struct inode * inode);
322 Index: linux-2.6.18-128.1.6/fs/ext4/inode.c
323 ===================================================================
324 --- linux-2.6.18-128.1.6.orig/fs/ext4/inode.c
325 +++ linux-2.6.18-128.1.6/fs/ext4/inode.c
326 @@ -37,6 +37,7 @@
327  #include <linux/bio.h>
328  #include "ext4_jbd2.h"
329  #include "xattr.h"
330 +#include "iopen.h"
331  #include "acl.h"
332  
333  /*
334 @@ -2764,6 +2765,8 @@ struct inode *ext4_iget(struct super_blo
335         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
336  #endif
337         ei->i_block_alloc_info = NULL;
338 +       if (ext4_iopen_get_inode(inode))
339 +               return inode;
340  
341         ret = __ext4_get_inode_loc(inode, &iloc, 0);
342         if (ret < 0)
343 Index: linux-2.6.18-128.1.6/fs/ext4/super.c
344 ===================================================================
345 --- linux-2.6.18-128.1.6.orig/fs/ext4/super.c
346 +++ linux-2.6.18-128.1.6/fs/ext4/super.c
347 @@ -888,6 +888,7 @@ enum {
348         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
349         Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
350         Opt_mballoc, Opt_nomballoc, Opt_stripe,
351 +       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
352  };
353  
354  static match_table_t tokens = {
355 @@ -938,6 +939,9 @@ static match_table_t tokens = {
356         {Opt_noquota, "noquota"},
357         {Opt_quota, "quota"},
358         {Opt_usrquota, "usrquota"},
359 +       {Opt_iopen, "iopen"},
360 +       {Opt_noiopen, "noiopen"},
361 +       {Opt_iopen_nopriv, "iopen_nopriv"},
362         {Opt_barrier, "barrier=%u"},
363         {Opt_extents, "extents"},
364         {Opt_noextents, "noextents"},
365 @@ -1270,6 +1274,18 @@ clear_qf_name:
366                         else
367                                 clear_opt(sbi->s_mount_opt, BARRIER);
368                         break;
369 +               case Opt_iopen:
370 +                       set_opt (sbi->s_mount_opt, IOPEN);
371 +                       clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
372 +                       break;
373 +               case Opt_noiopen:
374 +                       clear_opt (sbi->s_mount_opt, IOPEN);
375 +                       clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
376 +                       break;
377 +               case Opt_iopen_nopriv:
378 +                       set_opt (sbi->s_mount_opt, IOPEN);
379 +                       set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
380 +                       break;
381                 case Opt_ignore:
382                         break;
383                 case Opt_resize:
384 Index: linux-2.6.18-128.1.6/fs/ext4/namei.c
385 ===================================================================
386 --- linux-2.6.18-128.1.6.orig/fs/ext4/namei.c
387 +++ linux-2.6.18-128.1.6/fs/ext4/namei.c
388 @@ -39,6 +39,7 @@
389  
390  #include "namei.h"
391  #include "xattr.h"
392 +#include "iopen.h"
393  #include "acl.h"
394  
395  /*
396 @@ -1048,6 +1049,9 @@ static struct dentry *ext4_lookup(struct
397         if (dentry->d_name.len > EXT4_NAME_LEN)
398                 return ERR_PTR(-ENAMETOOLONG);
399  
400 +       if (ext4_check_for_iopen(dir, dentry))
401 +               return NULL;
402 +
403         bh = ext4_find_entry(dentry, &de);
404         inode = NULL;
405         if (bh) {
406 @@ -1062,7 +1066,8 @@ static struct dentry *ext4_lookup(struct
407                 if (IS_ERR(inode))
408                         return ERR_CAST(inode);
409         }
410 -       return d_splice_alias(inode, dentry);
411 +
412 +       return iopen_connect_dentry(dentry, inode, 1);
413  }
414  
415  
416 @@ -1709,7 +1714,7 @@ static int ext4_add_nondir(handle_t *han
417         int err = ext4_add_entry(handle, dentry, inode);
418         if (!err) {
419                 ext4_mark_inode_dirty(handle, inode);
420 -               d_instantiate(dentry, inode);
421 +               iopen_d_instantiate(dentry, inode);
422                 return 0;
423         }
424         drop_nlink(inode);
425 @@ -1868,7 +1873,7 @@ out_clear_inode:
426         ext4_inc_count(handle, dir);
427         ext4_update_dx_flag(dir);
428         ext4_mark_inode_dirty(handle, dir);
429 -       d_instantiate(dentry, inode);
430 +       iopen_d_instantiate(dentry, inode);
431  out_stop:
432         ext4_journal_stop(handle);
433         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
434 @@ -2134,10 +2139,6 @@ static int ext4_rmdir (struct inode * di
435                               inode->i_nlink);
436         inode->i_version++;
437         clear_nlink(inode);
438 -       /* There's no need to set i_disksize: the fact that i_nlink is
439 -        * zero will ensure that the right thing happens during any
440 -        * recovery. */
441 -       inode->i_size = 0;
442         ext4_orphan_add(handle, inode);
443         inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
444         ext4_mark_inode_dirty(handle, inode);
445 @@ -2263,6 +2264,23 @@ out_stop:
446         return err;
447  }
448  
449 +/* Like ext4_add_nondir() except for call to iopen_connect_dentry */
450 +static int ext4_add_link(handle_t *handle, struct dentry *dentry,
451 +                        struct inode *inode)
452 +{
453 +       int err = ext4_add_entry(handle, dentry, inode);
454 +       if (!err) {
455 +               err = ext4_mark_inode_dirty(handle, inode);
456 +               if (err == 0) {
457 +                       dput(iopen_connect_dentry(dentry, inode, 0));
458 +                       return 0;
459 +               }
460 +       }
461 +       ext4_dec_count(handle, inode);
462 +       iput(inode);
463 +       return err;
464 +}
465 +
466  static int ext4_link (struct dentry * old_dentry,
467                 struct inode * dir, struct dentry *dentry)
468  {
469 @@ -2293,7 +2311,8 @@ retry:
470         ext4_inc_count(handle, inode);
471         atomic_inc(&inode->i_count);
472  
473 -       err = ext4_add_nondir(handle, dentry, inode);
474 +       err = ext4_add_link(handle, dentry, inode);
475 +       ext4_orphan_del(handle, inode);
476         ext4_journal_stop(handle);
477         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
478                 goto retry;
479 Index: linux-2.6.18-128.1.6/fs/ext4/Makefile
480 ===================================================================
481 --- linux-2.6.18-128.1.6.orig/fs/ext4/Makefile
482 +++ linux-2.6.18-128.1.6/fs/ext4/Makefile
483 @@ -4,7 +4,7 @@
484  
485  obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
486  
487 -ext4dev-y      := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
488 +ext4dev-y      := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
489                    ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
490                    ext4_jbd2.o migrate.o mballoc.o
491  
492 Index: linux-2.6.18-128.1.6/fs/ext4/ext4.h
493 ===================================================================
494 --- linux-2.6.18-128.1.6.orig/fs/ext4/ext4.h
495 +++ linux-2.6.18-128.1.6/fs/ext4/ext4.h
496 @@ -18,6 +18,7 @@
497  
498  #include <linux/types.h>
499  #include <linux/blkdev.h>
500 +#include <linux/jbd2.h>
501  #include "ext4_i.h"
502  
503  #define EXT4_SUPER_MAGIC       0xEF53
504 @@ -537,6 +538,8 @@ do {                                                                               \
505  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT        0x1000000 /* Journal Async Commit */
506  #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
507  #define EXT4_MOUNT_MBALLOC             0x4000000 /* Buddy allocation support */
508 +#define EXT4_MOUNT_IOPEN               0x8000000 /* Allow access via iopen */
509 +#define EXT4_MOUNT_IOPEN_NOPRIV                0x10000000 /* Make iopen world-readable */
510  /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
511  #ifndef _LINUX_EXT2_FS_H
512  #define clear_opt(o, opt)              o &= ~EXT4_MOUNT_##opt