Whamcloud - gitweb
Land b1_2 onto HEAD (20040317_2319)
[fs/lustre-release.git] / lustre / kernel_patches / patches / iopen-2.4.21-chaos.patch
1  Documentation/filesystems/ext2.txt |   16 ++
2  fs/ext3/Makefile                   |    2 
3  fs/ext3/inode.c                    |    4 
4  fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
5  fs/ext3/iopen.h                    |   13 +
6  fs/ext3/namei.c                    |   13 +
7  fs/ext3/super.c                    |   11 +
8  include/linux/ext3_fs.h            |    2 
9  8 files changed, 318 insertions(+), 2 deletions(-)
10
11 Index: linux-ia64/Documentation/filesystems/ext2.txt
12 ===================================================================
13 --- linux-ia64.orig/Documentation/filesystems/ext2.txt  2004-03-17 15:47:15.000000000 -0800
14 +++ linux-ia64/Documentation/filesystems/ext2.txt       2004-03-17 18:03:15.000000000 -0800
15 @@ -35,6 +35,22 @@ resgid=n                     The group ID which may use th
16  
17  sb=n                           Use alternate superblock at this location.
18  
19 +iopen                          Makes an invisible pseudo-directory called
20 +                               __iopen__ available in the root directory
21 +                               of the filesystem.  Allows open-by-inode-
22 +                               number.  i.e., inode 3145 can be accessed
23 +                               via /mntpt/__iopen__/3145
24 +
25 +iopen_nopriv                   This option makes the iopen directory be
26 +                               world-readable.  This may be safer since it
27 +                               allows daemons to run as an unprivileged user,
28 +                               however it significantly changes the security
29 +                               model of a Unix filesystem, since previously
30 +                               all files under a mode 700 directory were not
31 +                               generally avilable even if the
32 +                               permissions on the file itself is
33 +                               world-readable.
34 +
35  grpquota,noquota,quota,usrquota        Quota options are silently ignored by ext2.
36  
37  
38 Index: linux-ia64/fs/ext3/Makefile
39 ===================================================================
40 --- linux-ia64.orig/fs/ext3/Makefile    2004-03-17 18:03:14.000000000 -0800
41 +++ linux-ia64/fs/ext3/Makefile 2004-03-17 18:03:15.000000000 -0800
42 @@ -11,7 +11,7 @@ O_TARGET := ext3.o
43  
44  export-objs := ext3-exports.o
45  
46 -obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
47 +obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
48                 ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
49  obj-m    := $(O_TARGET)
50  
51 Index: linux-ia64/fs/ext3/inode.c
52 ===================================================================
53 --- linux-ia64.orig/fs/ext3/inode.c     2004-03-17 18:03:15.000000000 -0800
54 +++ linux-ia64/fs/ext3/inode.c  2004-03-17 18:10:36.000000000 -0800
55 @@ -34,6 +34,7 @@
56  #include <linux/highuid.h>
57  #include <linux/quotaops.h>
58  #include <linux/module.h>
59 +#include "iopen.h"
60  
61  /*
62   * SEARCH_FROM_ZERO forces each block allocation to search from the start
63 @@ -2430,6 +2431,9 @@ void ext3_read_inode(struct inode * inod
64         struct buffer_head *bh;
65         int block;
66         
67 +       if (ext3_iopen_get_inode(inode))
68 +               return;
69 +
70         if(ext3_get_inode_loc(inode, &iloc))
71                 goto bad_inode;
72         bh = iloc.bh;
73 Index: linux-ia64/fs/ext3/iopen.c
74 ===================================================================
75 --- linux-ia64.orig/fs/ext3/iopen.c     2004-03-17 18:02:08.000000000 -0800
76 +++ linux-ia64/fs/ext3/iopen.c  2004-03-17 18:10:58.000000000 -0800
77 @@ -8,3 +8,275 @@
78   * This file may be redistributed under the terms of the GNU General
79   * Public License.
80   *
81 + *
82 + * Invariants:
83 + *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
84 + *     for an inode at one time.
85 + *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
86 + *     aliases on an inode at the same time.
87 + *
88 + * If we have any connected dentry aliases for an inode, use one of those
89 + * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
90 + * dentry for this inode, which thereafter will be found by the dcache
91 + * when looking up this inode number in __iopen__, so we don't return here
92 + * until it is gone.
93 + *
94 + * If we get an inode via a regular name lookup, then we "rename" the
95 + * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
96 + * existing users of the disconnected dentry will continue to use the same
97 + * dentry as the connected users, and there will never be both kinds of
98 + * dentry aliases at one time.
99 + */
100 +
101 +#include <linux/sched.h>
102 +#include <linux/fs.h>
103 +#include <linux/locks.h>
104 +#include <linux/ext3_jbd.h>
105 +#include <linux/jbd.h>
106 +#include <linux/ext3_fs.h>
107 +#include <linux/smp_lock.h>
108 +#include "iopen.h"
109 +
110 +#ifndef assert
111 +#define assert(test) J_ASSERT(test)
112 +#endif
113 +
114 +#define IOPEN_NAME_LEN 32
115 +
116 +/*
117 + * This implements looking up an inode by number.
118 + */
119 +static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry)
120 +{
121 +       struct inode *inode;
122 +       unsigned long ino;
123 +       struct list_head *lp;
124 +       struct dentry *alternate;
125 +       char buf[IOPEN_NAME_LEN];
126 +
127 +       if (dentry->d_name.len >= IOPEN_NAME_LEN)
128 +               return ERR_PTR(-ENAMETOOLONG);
129 +
130 +       memcpy(buf, dentry->d_name.name, dentry->d_name.len);
131 +       buf[dentry->d_name.len] = 0;
132 +
133 +       if (strcmp(buf, ".") == 0)
134 +               ino = dir->i_ino;
135 +       else if (strcmp(buf, "..") == 0)
136 +               ino = EXT3_ROOT_INO;
137 +       else
138 +               ino = simple_strtoul(buf, 0, 0);
139 +
140 +       if ((ino != EXT3_ROOT_INO &&
141 +            //ino != EXT3_ACL_IDX_INO &&
142 +            //ino != EXT3_ACL_DATA_INO &&
143 +            ino < EXT3_FIRST_INO(dir->i_sb)) ||
144 +           ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
145 +               return ERR_PTR(-ENOENT);
146 +
147 +       inode = iget(dir->i_sb, ino);
148 +       if (!inode)
149 +               return ERR_PTR(-EACCES);
150 +       if (is_bad_inode(inode)) {
151 +               iput(inode);
152 +               return ERR_PTR(-ENOENT);
153 +       }
154 +
155 +       assert(list_empty(&dentry->d_alias));           /* d_instantiate */
156 +       assert(list_empty(&dentry->d_hash));            /* d_rehash */
157 +
158 +       /* preferrably return a connected dentry */
159 +       spin_lock(&dcache_lock);
160 +       list_for_each(lp, &inode->i_dentry) {
161 +               alternate = list_entry(lp, struct dentry, d_alias);
162 +               assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
163 +       }
164 +
165 +       if (!list_empty(&inode->i_dentry)) {
166 +               alternate = list_entry(inode->i_dentry.next,
167 +                                      struct dentry, d_alias);
168 +               dget_locked(alternate);
169 +               alternate->d_vfs_flags |= DCACHE_REFERENCED;
170 +               iput(inode);
171 +               spin_unlock(&dcache_lock);
172 +               return alternate;
173 +       }
174 +       dentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
175 +
176 +       /* d_add(), but don't drop dcache_lock before adding dentry to inode */
177 +       list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
178 +       dentry->d_inode = inode;
179 +
180 +       __d_rehash(dentry, 0);                          /* d_rehash */
181 +       spin_unlock(&dcache_lock);
182 +
183 +       return NULL;
184 +}
185 +
186 +#define do_switch(x,y) do { \
187 +       __typeof__ (x) __tmp = x; \
188 +       x = y; y = __tmp; } while (0)
189 +
190 +static inline void switch_names(struct dentry *dentry, struct dentry *target)
191 +{
192 +       const unsigned char *old_name, *new_name;
193 +
194 +       memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
195 +       old_name = target->d_name.name;
196 +       new_name = dentry->d_name.name;
197 +       if (old_name == target->d_iname)
198 +               old_name = dentry->d_iname;
199 +       if (new_name == dentry->d_iname)
200 +               new_name = target->d_iname;
201 +       target->d_name.name = new_name;
202 +       dentry->d_name.name = old_name;
203 +}
204 +
205 +/* This function is spliced into ext3_lookup and does the move of a
206 + * disconnected dentry (if it exists) to a connected dentry.
207 + */
208 +struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode,
209 +                                   int rehash)
210 +{
211 +       struct dentry *tmp, *goal = NULL;
212 +       struct list_head *lp;
213 +
214 +       /* verify this dentry is really new */
215 +       assert(dentry->d_inode == NULL);
216 +       assert(list_empty(&dentry->d_alias));           /* d_instantiate */
217 +       if (rehash)
218 +               assert(list_empty(&dentry->d_hash));    /* d_rehash */
219 +       assert(list_empty(&dentry->d_subdirs));
220 +
221 +       spin_lock(&dcache_lock);
222 +       if (!inode)
223 +               goto do_rehash;
224 +
225 +       /* preferrably return a connected dentry */
226 +       list_for_each(lp, &inode->i_dentry) {
227 +               tmp = list_entry(lp, struct dentry, d_alias);
228 +               if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) {
229 +                       assert(tmp->d_alias.next == &inode->i_dentry);
230 +                       assert(tmp->d_alias.prev == &inode->i_dentry);
231 +                       goal = tmp;
232 +                       dget_locked(goal);
233 +                       break;
234 +               }
235 +       }
236 +
237 +       if (!goal)
238 +               goto do_instantiate;
239 +
240 +       /* Move the goal to the de hash queue - like d_move() */
241 +       goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED;
242 +       list_del_init(&goal->d_hash);
243 +
244 +       list_del(&goal->d_child);
245 +       list_del(&dentry->d_child);
246 +
247 +       /* Switch the parents and the names.. */
248 +       switch_names(goal, dentry);
249 +       do_switch(goal->d_parent, dentry->d_parent);
250 +       do_switch(goal->d_name.len, dentry->d_name.len);
251 +       do_switch(goal->d_name.hash, dentry->d_name.hash);
252 +
253 +       /* And add them back to the (new) parent lists */
254 +       list_add(&goal->d_child, &goal->d_parent->d_subdirs);
255 +       list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
256 +       __d_rehash(goal, 0);
257 +       spin_unlock(&dcache_lock);
258 +       iput(inode);
259 +
260 +       return goal;
261 +
262 +       /* d_add(), but don't drop dcache_lock before adding dentry to inode */
263 +do_instantiate:
264 +       list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
265 +       dentry->d_inode = inode;
266 +do_rehash:
267 +       if (rehash)
268 +               __d_rehash(dentry, 0);                  /* d_rehash */
269 +       spin_unlock(&dcache_lock);
270 +
271 +       return NULL;
272 +}
273 +
274 +/*
275 + * These are the special structures for the iopen pseudo directory.
276 + */
277 +
278 +static struct inode_operations iopen_inode_operations = {
279 +       lookup:         iopen_lookup,           /* BKL held */
280 +};
281 +
282 +static struct file_operations iopen_file_operations = {
283 +       read:           generic_read_dir,
284 +};
285 +
286 +static int match_dentry(struct dentry *dentry, const char *name)
287 +{
288 +       int     len;
289 +
290 +       len = strlen(name);
291 +       if (dentry->d_name.len != len)
292 +               return 0;
293 +       if (strncmp(dentry->d_name.name, name, len))
294 +               return 0;
295 +       return 1;
296 +}
297 +
298 +/*
299 + * This function is spliced into ext3_lookup and returns 1 the file
300 + * name is __iopen__ and dentry has been filled in appropriately.
301 + */
302 +int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry)
303 +{
304 +       struct inode *inode;
305 +
306 +       if (dir->i_ino != EXT3_ROOT_INO ||
307 +           !test_opt(dir->i_sb, IOPEN) ||
308 +           !match_dentry(dentry, "__iopen__"))
309 +               return 0;
310 +
311 +       inode = iget(dir->i_sb, EXT3_BAD_INO);
312 +
313 +       if (!inode)
314 +               return 0;
315 +       d_add(dentry, inode);
316 +       return 1;
317 +}
318 +
319 +/*
320 + * This function is spliced into read_inode; it returns 1 if inode
321 + * number is the one for /__iopen__, in which case the inode is filled
322 + * in appropriately.  Otherwise, this fuction returns 0.
323 + */
324 +int ext3_iopen_get_inode(struct inode *inode)
325 +{
326 +       if (inode->i_ino != EXT3_BAD_INO)
327 +               return 0;
328 +
329 +       inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
330 +       if (test_opt(inode->i_sb, IOPEN_NOPRIV))
331 +               inode->i_mode |= 0777;
332 +       inode->i_uid = 0;
333 +       inode->i_gid = 0;
334 +       inode->i_nlink = 1;
335 +       inode->i_size = 4096;
336 +       inode->i_atime = CURRENT_TIME;
337 +       inode->i_ctime = CURRENT_TIME;
338 +       inode->i_mtime = CURRENT_TIME;
339 +       inode->u.ext3_i.i_dtime = 0;
340 +       inode->i_blksize = PAGE_SIZE;   /* This is the optimal IO size
341 +                                        * (for stat), not the fs block
342 +                                        * size */
343 +       inode->i_blocks = 0;
344 +       inode->i_version = 1;
345 +       inode->i_generation = 0;
346 +
347 +       inode->i_op = &iopen_inode_operations;
348 +       inode->i_fop = &iopen_file_operations;
349 +       inode->i_mapping->a_ops = 0;
350 +
351 +       return 1;
352 +}
353 Index: linux-ia64/fs/ext3/iopen.h
354 ===================================================================
355 --- linux-ia64.orig/fs/ext3/iopen.h     2004-03-17 15:47:15.000000000 -0800
356 +++ linux-ia64/fs/ext3/iopen.h  2004-03-17 18:03:15.000000000 -0800
357 @@ -0,0 +1,15 @@
358 +/*
359 + * iopen.h
360 + *
361 + * Special support for opening files by inode number.
362 + *
363 + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
364 + *
365 + * This file may be redistributed under the terms of the GNU General
366 + * Public License.
367 + */
368 +
369 +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
370 +extern int ext3_iopen_get_inode(struct inode *inode);
371 +extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
372 +                                          struct inode *inode, int rehash);
373 Index: linux-ia64/fs/ext3/namei.c
374 ===================================================================
375 --- linux-ia64.orig/fs/ext3/namei.c     2004-03-17 18:03:15.000000000 -0800
376 +++ linux-ia64/fs/ext3/namei.c  2004-03-17 18:10:35.000000000 -0800
377 @@ -36,7 +36,7 @@
378  #include <linux/string.h>
379  #include <linux/locks.h>
380  #include <linux/quotaops.h>
381 -
382 +#include "iopen.h"
383  
384  /*
385   * define how far ahead to read directories while searching them.
386 @@ -932,6 +932,9 @@ static struct dentry *ext3_lookup(struct
387         if (dentry->d_name.len > EXT3_NAME_LEN)
388                 return ERR_PTR(-ENAMETOOLONG);
389  
390 +       if (ext3_check_for_iopen(dir, dentry))
391 +               return NULL;
392 +
393         bh = ext3_find_entry(dentry, &de);
394         inode = NULL;
395         if (bh) {
396 @@ -943,8 +946,8 @@ static struct dentry *ext3_lookup(struct
397                         return ERR_PTR(-EACCES);
398                 }
399         }
400 -       d_add(dentry, inode);
401 -       return NULL;
402 +
403 +       return iopen_connect_dentry(dentry, inode, 1);
404  }
405  
406  #define S_SHIFT 12
407 @@ -1935,10 +1938,6 @@ static int ext3_rmdir (struct inode * di
408                               inode->i_nlink);
409         inode->i_version = ++event;
410         inode->i_nlink = 0;
411 -       /* There's no need to set i_disksize: the fact that i_nlink is
412 -        * zero will ensure that the right thing happens during any
413 -        * recovery. */
414 -       inode->i_size = 0;
415         ext3_orphan_add(handle, inode);
416         dir->i_nlink--;
417         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
418 @@ -2057,6 +2056,23 @@ out_stop:
419         return err;
420  }
421  
422 +/* Like ext3_add_nondir() except for call to iopen_connect_dentry */
423 +static int ext3_add_link(handle_t *handle, struct dentry *dentry,
424 +                        struct inode *inode)
425 +{
426 +       int err = ext3_add_entry(handle, dentry, inode);
427 +       if (!err) {
428 +               err = ext3_mark_inode_dirty(handle, inode);
429 +               if (err == 0) {
430 +                       (void)iopen_connect_dentry(dentry, inode, 0);
431 +                       return 0;
432 +               }
433 +       }
434 +       ext3_dec_count(handle, inode);
435 +       iput(inode);
436 +       return err;
437 +}
438 +
439  static int ext3_link (struct dentry * old_dentry,
440                 struct inode * dir, struct dentry *dentry)
441  {
442 @@ -2084,7 +2100,8 @@ static int ext3_link (struct dentry * ol
443         ext3_inc_count(handle, inode);
444         atomic_inc(&inode->i_count);
445  
446 -       err = ext3_add_nondir(handle, dentry, inode);
447 +       err = ext3_add_link(handle, dentry, inode);
448 +       ext3_orphan_del(handle, inode);
449         ext3_journal_stop(handle, dir);
450         return err;
451  }
452 Index: linux-ia64/fs/ext3/super.c
453 ===================================================================
454 --- linux-ia64.orig/fs/ext3/super.c     2004-03-17 18:03:14.000000000 -0800
455 +++ linux-ia64/fs/ext3/super.c  2004-03-17 18:10:35.000000000 -0800
456 @@ -891,6 +891,18 @@ static int parse_options (char * options
457                          || !strcmp (this_char, "quota")
458                          || !strcmp (this_char, "usrquota"))
459                         /* Don't do anything ;-) */ ;
460 +               else if (!strcmp (this_char, "iopen")) {
461 +                       set_opt (sbi->s_mount_opt, IOPEN);
462 +                       clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
463 +               }
464 +               else if (!strcmp (this_char, "noiopen")) {
465 +                       clear_opt (sbi->s_mount_opt, IOPEN);
466 +                       clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
467 +               }
468 +               else if (!strcmp (this_char, "iopen_nopriv")) {
469 +                       set_opt (sbi->s_mount_opt, IOPEN);
470 +                       set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
471 +               }
472                 else if (!strcmp (this_char, "journal")) {
473                         /* @@@ FIXME */
474                         /* Eventually we will want to be able to create
475 Index: linux-ia64/include/linux/ext3_fs.h
476 ===================================================================
477 --- linux-ia64.orig/include/linux/ext3_fs.h     2004-03-17 18:03:15.000000000 -0800
478 +++ linux-ia64/include/linux/ext3_fs.h  2004-03-17 18:03:15.000000000 -0800
479 @@ -328,6 +328,8 @@ struct ext3_inode {
480  #define EXT3_MOUNT_XATTR_USER          0x4000  /* Extended user attributes */
481  #define EXT3_MOUNT_POSIX_ACL           0x8000  /* POSIX Access Control Lists */
482  #define EXT3_MOUNT_ASYNCDEL            0x20000 /* Delayed deletion */
483 +#define EXT3_MOUNT_IOPEN               0x40000 /* Allow access via iopen */
484 +#define EXT3_MOUNT_IOPEN_NOPRIV                0x80000 /* Make iopen world-readable */
485  
486  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
487  #ifndef _LINUX_EXT2_FS_H