Whamcloud - gitweb
b=19674
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / iopen-sles11.patch
1 Index: linux-2.6.27.21-0.1/fs/ext4/iopen.c
2 ===================================================================
3 --- /dev/null
4 +++ linux-2.6.27.21-0.1/fs/ext4/iopen.c
5 @@ -0,0 +1,295 @@
6 +/*
7 + * linux/fs/ext4/iopen.c
8 + *
9 + * Special support for open by inode number
10 + *
11 + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
12 + *
13 + * This file may be redistributed under the terms of the GNU General
14 + * Public License.
15 + *
16 + *
17 + * Invariants:
18 + *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
19 + *     for an inode at one time.
20 + *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
21 + *     aliases on an inode at the same time.
22 + *
23 + * If we have any connected dentry aliases for an inode, use one of those
24 + * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
25 + * dentry for this inode, which thereafter will be found by the dcache
26 + * when looking up this inode number in __iopen__, so we don't return here
27 + * until it is gone.
28 + *
29 + * If we get an inode via a regular name lookup, then we "rename" the
30 + * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
31 + * existing users of the disconnected dentry will continue to use the same
32 + * dentry as the connected users, and there will never be both kinds of
33 + * dentry aliases at one time.
34 + */
35 +
36 +#include <linux/sched.h>
37 +#include <linux/fs.h>
38 +#include <linux/smp_lock.h>
39 +#include <linux/dcache.h>
40 +#include <linux/security.h>
41 +#include "iopen.h"
42 +#include "ext4.h"
43 +#include "ext4_jbd2.h"
44 +
45 +#ifndef assert
46 +#define assert(test) J_ASSERT(test)
47 +#endif
48 +
49 +#define IOPEN_NAME_LEN 32
50 +
51 +/*
52 + * This implements looking up an inode by number.
53 + */
54 +static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry,
55 +                                  struct nameidata *nd)
56 +{
57 +       struct inode *inode;
58 +       unsigned long ino;
59 +       struct list_head *lp;
60 +       struct dentry *alternate;
61 +       char buf[IOPEN_NAME_LEN];
62 +
63 +       if (dentry->d_name.len >= IOPEN_NAME_LEN)
64 +               return ERR_PTR(-ENAMETOOLONG);
65 +
66 +       memcpy(buf, dentry->d_name.name, dentry->d_name.len);
67 +       buf[dentry->d_name.len] = 0;
68 +
69 +       if (strcmp(buf, ".") == 0)
70 +               ino = dir->i_ino;
71 +       else if (strcmp(buf, "..") == 0)
72 +               ino = EXT4_ROOT_INO;
73 +       else
74 +               ino = simple_strtoul(buf, 0, 0);
75 +
76 +       if ((ino != EXT4_ROOT_INO &&
77 +            ino < EXT4_FIRST_INO(dir->i_sb)) ||
78 +           ino > le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))
79 +               return ERR_PTR(-ENOENT);
80 +
81 +       inode = ext4_iget(dir->i_sb, ino);
82 +       if (IS_ERR(inode)) {
83 +                /* Newer kernels return -ESTALE for inodes that are not in use,
84 +                 * but older kernels return a negative dentry. This can only
85 +                 * happen when doing a lookup in the __iopen__ dir, because the
86 +                 * "entry" will always be found even if inode is unallocated.
87 +                 * Handle this here instead of fixing the callers. b=19114 */
88 +               if (PTR_ERR(inode) == -ESTALE)
89 +                       return (ERR_PTR(-ENOENT));
90 +               return ERR_CAST(inode);
91 +       }
92 +
93 +       assert(list_empty(&dentry->d_alias));           /* d_instantiate */
94 +       assert(d_unhashed(dentry));                     /* d_rehash */
95 +
96 +       /* preferrably return a connected dentry */
97 +       spin_lock(&dcache_lock);
98 +       list_for_each(lp, &inode->i_dentry) {
99 +               alternate = list_entry(lp, struct dentry, d_alias);
100 +               assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
101 +       }
102 +
103 +       if (!list_empty(&inode->i_dentry)) {
104 +               alternate = list_entry(inode->i_dentry.next,
105 +                                      struct dentry, d_alias);
106 +               dget_locked(alternate);
107 +               spin_lock(&alternate->d_lock);
108 +               alternate->d_flags |= DCACHE_REFERENCED;
109 +               spin_unlock(&alternate->d_lock);
110 +               iput(inode);
111 +               spin_unlock(&dcache_lock);
112 +               return alternate;
113 +       }
114 +       dentry->d_flags |= DCACHE_DISCONNECTED;
115 +
116 +       /* d_add(), but don't drop dcache_lock before adding dentry to inode */
117 +       list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
118 +       dentry->d_inode = inode;
119 +
120 +       d_rehash_cond(dentry, 0);
121 +       spin_unlock(&dcache_lock);
122 +
123 +       return NULL;
124 +}
125 +
126 +/* This function is spliced into ext4_lookup and does the move of a
127 + * disconnected dentry (if it exists) to a connected dentry.
128 + */
129 +struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode,
130 +                                   int rehash)
131 +{
132 +       struct dentry *tmp, *goal = NULL;
133 +       struct list_head *lp;
134 +
135 +       /* verify this dentry is really new */
136 +       assert(dentry->d_inode == NULL);
137 +       assert(list_empty(&dentry->d_alias));           /* d_instantiate */
138 +       if (rehash)
139 +               assert(d_unhashed(dentry));             /* d_rehash */
140 +       assert(list_empty(&dentry->d_subdirs));
141 +
142 +       spin_lock(&dcache_lock);
143 +       if (!inode)
144 +               goto do_rehash;
145 +
146 +       if (!test_opt(inode->i_sb, IOPEN))
147 +               goto do_instantiate;
148 +
149 +       /* preferrably return a connected dentry */
150 +       list_for_each(lp, &inode->i_dentry) {
151 +               tmp = list_entry(lp, struct dentry, d_alias);
152 +               if (tmp->d_flags & DCACHE_DISCONNECTED) {
153 +                       assert(tmp->d_alias.next == &inode->i_dentry);
154 +                       assert(tmp->d_alias.prev == &inode->i_dentry);
155 +                       goal = tmp;
156 +                       dget_locked(goal);
157 +                       break;
158 +               }
159 +       }
160 +
161 +       if (!goal)
162 +               goto do_instantiate;
163 +
164 +       /* Move the goal to the de hash queue */
165 +       goal->d_flags &= ~DCACHE_DISCONNECTED;
166 +       security_d_instantiate(goal, inode);
167 +       __d_drop(dentry);
168 +       d_rehash_cond(dentry, 0);
169 +       d_move_locked(goal, dentry);
170 +       spin_unlock(&dcache_lock);
171 +       iput(inode);
172 +
173 +       return goal;
174 +
175 +       /* d_add(), but don't drop dcache_lock before adding dentry to inode */
176 +do_instantiate:
177 +       list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
178 +       dentry->d_inode = inode;
179 +do_rehash:
180 +       if (rehash)
181 +               d_rehash_cond(dentry, 0);
182 +       spin_unlock(&dcache_lock);
183 +
184 +       return NULL;
185 +}
186 +
187 +/*
188 + * Similar as d_instantiate() except that it drops the disconnected
189 + * dentry if any.
190 + */
191 +void iopen_d_instantiate(struct dentry *dentry, struct inode * inode)
192 +{
193 +       struct dentry *dis_dentry;
194 +
195 +       /* verify this dentry is really new */
196 +       assert(dentry->d_inode == NULL);
197 +       assert(list_empty(&dentry->d_alias));
198 +
199 +       spin_lock(&dcache_lock);
200 +       if (!inode || !test_opt(inode->i_sb, IOPEN) ||
201 +           list_empty(&inode->i_dentry))
202 +               goto do_instantiate;
203 +
204 +       /* a disconnected dentry has been added in our back,
205 +        * we have to drop this dentry, see bug 16362/15713*/
206 +       dis_dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
207 +       spin_lock(&dis_dentry->d_lock);
208 +       assert(dis_dentry->d_alias.next == &inode->i_dentry);
209 +       assert(dis_dentry->d_alias.prev == &inode->i_dentry);
210 +       assert(dis_dentry->d_flags & DCACHE_DISCONNECTED);
211 +       __d_drop(dis_dentry);
212 +       list_del_init(&dis_dentry->d_alias);
213 +       spin_unlock(&dis_dentry->d_lock);
214 +
215 +do_instantiate:
216 +       if (inode)
217 +               list_add(&dentry->d_alias, &inode->i_dentry);
218 +       dentry->d_inode = inode;
219 +       spin_unlock(&dcache_lock);
220 +       security_d_instantiate(dentry, inode);
221 +}
222 +
223 +/*
224 + * These are the special structures for the iopen pseudo directory.
225 + */
226 +
227 +static struct inode_operations iopen_inode_operations = {
228 +       lookup:         iopen_lookup,           /* BKL held */
229 +};
230 +
231 +static struct file_operations iopen_file_operations = {
232 +       read:           generic_read_dir,
233 +};
234 +
235 +static int match_dentry(struct dentry *dentry, const char *name)
236 +{
237 +       int     len;
238 +
239 +       len = strlen(name);
240 +       if (dentry->d_name.len != len)
241 +               return 0;
242 +       if (strncmp(dentry->d_name.name, name, len))
243 +               return 0;
244 +       return 1;
245 +}
246 +
247 +/*
248 + * This function is spliced into ext4_lookup and returns 1 the file
249 + * name is __iopen__ and dentry has been filled in appropriately.
250 + */
251 +int ext4_check_for_iopen(struct inode *dir, struct dentry *dentry)
252 +{
253 +       struct inode *inode;
254 +
255 +       if (dir->i_ino != EXT4_ROOT_INO ||
256 +           !test_opt(dir->i_sb, IOPEN) ||
257 +           !match_dentry(dentry, "__iopen__"))
258 +               return 0;
259 +
260 +       inode = ext4_iget(dir->i_sb, EXT4_BAD_INO);
261 +       if (IS_ERR(inode))
262 +               return 0;
263 +
264 +       d_add(dentry, inode);
265 +       return 1;
266 +}
267 +
268 +/*
269 + * This function is spliced into read_inode; it returns 1 if inode
270 + * number is the one for /__iopen__, in which case the inode is filled
271 + * in appropriately.  Otherwise, this fuction returns 0.
272 + */
273 +int ext4_iopen_get_inode(struct inode *inode)
274 +{
275 +       if (inode->i_ino != EXT4_BAD_INO)
276 +               return 0;
277 +
278 +       inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
279 +       if (test_opt(inode->i_sb, IOPEN_NOPRIV))
280 +               inode->i_mode |= 0777;
281 +       inode->i_uid = 0;
282 +       inode->i_gid = 0;
283 +       inode->i_nlink = 1;
284 +       inode->i_size = 4096;
285 +       inode->i_atime = inode->i_ctime = inode->i_mtime =  ext4_current_time(inode);
286 +       EXT4_I(inode)->i_dtime = 0;
287 +       EXT4_I(inode)->i_file_acl = 0;
288 +       inode->i_blocks = 0;
289 +       inode->i_version = 1;
290 +       inode->i_generation = 0;
291 +
292 +       inode->i_op = &iopen_inode_operations;
293 +       inode->i_fop = &iopen_file_operations;
294 +       inode->i_mapping->a_ops = 0;
295 +
296 +       if (inode->i_state & I_NEW)
297 +               unlock_new_inode(inode);
298 +
299 +       return 1;
300 +}
301 Index: linux-2.6.27.21-0.1/fs/ext4/iopen.h
302 ===================================================================
303 --- /dev/null
304 +++ linux-2.6.27.21-0.1/fs/ext4/iopen.h
305 @@ -0,0 +1,16 @@
306 +/*
307 + * iopen.h
308 + *
309 + * Special support for opening files by inode number.
310 + *
311 + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
312 + *
313 + * This file may be redistributed under the terms of the GNU General
314 + * Public License.
315 + */
316 +
317 +extern int ext4_check_for_iopen(struct inode *dir, struct dentry *dentry);
318 +extern int ext4_iopen_get_inode(struct inode *inode);
319 +extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
320 +                                          struct inode *inode, int rehash);
321 +extern void iopen_d_instantiate(struct dentry *dentry, struct inode * inode);
322 Index: linux-2.6.27.21-0.1/fs/ext4/inode.c
323 ===================================================================
324 --- linux-2.6.27.21-0.1.orig/fs/ext4/inode.c
325 +++ linux-2.6.27.21-0.1/fs/ext4/inode.c
326 @@ -38,6 +38,7 @@
327  #include <linux/bio.h>
328  #include "ext4_jbd2.h"
329  #include "xattr.h"
330 +#include "iopen.h"
331  #include "acl.h"
332  #include "ext4_extents.h"
333  
334 @@ -4115,6 +4116,9 @@ struct inode *ext4_iget(struct super_blo
335         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
336  #endif
337  
338 +       if (ext4_iopen_get_inode(inode))
339 +               return inode;
340 +
341         ret = __ext4_get_inode_loc(inode, &iloc, 0);
342         if (ret < 0)
343                 goto bad_inode;
344 Index: linux-2.6.27.21-0.1/fs/ext4/super.c
345 ===================================================================
346 --- linux-2.6.27.21-0.1.orig/fs/ext4/super.c
347 +++ linux-2.6.27.21-0.1/fs/ext4/super.c
348 @@ -955,7 +955,8 @@ enum {
349         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
350         Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
351         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
352 -       Opt_inode_readahead_blks
353 +       Opt_inode_readahead_blks,
354 +       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
355  };
356  
357  static const match_table_t tokens = {
358 @@ -1004,6 +1005,9 @@ static const match_table_t tokens = {
359         {Opt_noquota, "noquota"},
360         {Opt_quota, "quota"},
361         {Opt_usrquota, "usrquota"},
362 +       {Opt_iopen, "iopen"},
363 +       {Opt_noiopen, "noiopen"},
364 +       {Opt_iopen_nopriv, "iopen_nopriv"},
365         {Opt_barrier, "barrier=%u"},
366         {Opt_extents, "extents"},
367         {Opt_noextents, "noextents"},
368 @@ -1347,6 +1351,18 @@ set_qf_format:
369                         else
370                                 clear_opt(sbi->s_mount_opt, BARRIER);
371                         break;
372 +               case Opt_iopen:
373 +                       set_opt (sbi->s_mount_opt, IOPEN);
374 +                       clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
375 +                       break;
376 +               case Opt_noiopen:
377 +                       clear_opt (sbi->s_mount_opt, IOPEN);
378 +                       clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
379 +                       break;
380 +               case Opt_iopen_nopriv:
381 +                       set_opt (sbi->s_mount_opt, IOPEN);
382 +                       set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
383 +                       break;
384                 case Opt_ignore:
385                         break;
386                 case Opt_resize:
387 Index: linux-2.6.27.21-0.1/fs/ext4/namei.c
388 ===================================================================
389 --- linux-2.6.27.21-0.1.orig/fs/ext4/namei.c
390 +++ linux-2.6.27.21-0.1/fs/ext4/namei.c
391 @@ -39,6 +39,7 @@
392  
393  #include "namei.h"
394  #include "xattr.h"
395 +#include "iopen.h"
396  #include "acl.h"
397  
398  /*
399 @@ -1054,6 +1055,9 @@ static struct dentry *ext4_lookup(struct
400         if (dentry->d_name.len > EXT4_NAME_LEN)
401                 return ERR_PTR(-ENAMETOOLONG);
402  
403 +       if (ext4_check_for_iopen(dir, dentry))
404 +               return NULL;
405 +
406         bh = ext4_find_entry(dir, &dentry->d_name, &de);
407         inode = NULL;
408         if (bh) {
409 @@ -1068,7 +1072,8 @@ static struct dentry *ext4_lookup(struct
410                 if (IS_ERR(inode))
411                         return ERR_CAST(inode);
412         }
413 -       return d_splice_alias(inode, dentry);
414 +
415 +       return iopen_connect_dentry(dentry, inode, 1);
416  }
417  
418  
419 @@ -1717,7 +1722,7 @@ static int ext4_add_nondir(handle_t *han
420         int err = ext4_add_entry(handle, dentry, inode);
421         if (!err) {
422                 ext4_mark_inode_dirty(handle, inode);
423 -               d_instantiate(dentry, inode);
424 +               iopen_d_instantiate(dentry, inode);
425                 return 0;
426         }
427         drop_nlink(inode);
428 @@ -1876,7 +1881,7 @@ out_clear_inode:
429         ext4_inc_count(handle, dir);
430         ext4_update_dx_flag(dir);
431         ext4_mark_inode_dirty(handle, dir);
432 -       d_instantiate(dentry, inode);
433 +       iopen_d_instantiate(dentry, inode);
434  out_stop:
435         ext4_journal_stop(handle);
436         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
437 @@ -2142,10 +2147,6 @@ static int ext4_rmdir(struct inode *dir,
438                              inode->i_nlink);
439         inode->i_version++;
440         clear_nlink(inode);
441 -       /* There's no need to set i_disksize: the fact that i_nlink is
442 -        * zero will ensure that the right thing happens during any
443 -        * recovery. */
444 -       inode->i_size = 0;
445         ext4_orphan_add(handle, inode);
446         inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
447         ext4_mark_inode_dirty(handle, inode);
448 @@ -2271,6 +2272,23 @@ out_stop:
449         return err;
450  }
451  
452 +/* Like ext4_add_nondir() except for call to iopen_connect_dentry */
453 +static int ext4_add_link(handle_t *handle, struct dentry *dentry,
454 +                       struct inode *inode)
455 +{
456 +       int err = ext4_add_entry(handle, dentry, inode);
457 +       if (!err) {
458 +               err = ext4_mark_inode_dirty(handle, inode);
459 +               if (err == 0) {
460 +                       dput(iopen_connect_dentry(dentry, inode, 0));
461 +                       return 0;
462 +               }
463 +       }
464 +       ext4_dec_count(handle, inode);
465 +       iput(inode);
466 +       return err;
467 +}
468 +
469  static int ext4_link(struct dentry *old_dentry,
470                      struct inode *dir, struct dentry *dentry)
471  {
472 @@ -2301,7 +2319,8 @@ retry:
473         ext4_inc_count(handle, inode);
474         atomic_inc(&inode->i_count);
475  
476 -       err = ext4_add_nondir(handle, dentry, inode);
477 +       err = ext4_add_link(handle, dentry, inode);
478 +       ext4_orphan_del(handle, inode);
479         ext4_journal_stop(handle);
480         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
481                 goto retry;
482 Index: linux-2.6.27.21-0.1/fs/ext4/Makefile
483 ===================================================================
484 --- linux-2.6.27.21-0.1.orig/fs/ext4/Makefile
485 +++ linux-2.6.27.21-0.1/fs/ext4/Makefile
486 @@ -4,7 +4,7 @@
487  
488  obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
489  
490 -ext4dev-y      := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
491 +ext4dev-y      := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
492                    ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
493                    ext4_jbd2.o migrate.o mballoc.o
494  
495 Index: linux-2.6.27.21-0.1/fs/ext4/ext4.h
496 ===================================================================
497 --- linux-2.6.27.21-0.1.orig/fs/ext4/ext4.h
498 +++ linux-2.6.27.21-0.1/fs/ext4/ext4.h
499 @@ -540,6 +540,8 @@ do {                                                                               \
500  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT        0x1000000 /* Journal Async Commit */
501  #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
502  #define EXT4_MOUNT_DELALLOC            0x8000000 /* Delalloc support */
503 +#define EXT4_MOUNT_IOPEN               0x10000000 /* Allow access via iopen */
504 +#define EXT4_MOUNT_IOPEN_NOPRIV                0x20000000 /* Make iopen world-readable */
505  /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
506  #ifndef _LINUX_EXT2_FS_H
507  #define clear_opt(o, opt)              o &= ~EXT4_MOUNT_##opt