Whamcloud - gitweb
b=7264
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-reserve-inode-space-2.6.7.patch
1 Index: linux-2.6.7/fs/ext3/ialloc.c
2 ===================================================================
3 --- linux-2.6.7.orig/fs/ext3/ialloc.c   2004-08-27 14:28:16.000000000 +0800
4 +++ linux-2.6.7/fs/ext3/ialloc.c        2004-08-27 14:29:21.000000000 +0800
5 @@ -43,6 +43,24 @@
6   * the free blocks count in the block.
7   */
8  
9 +/*
10 + * this is very simple policy: files with O_INRESERVE goes to last group;
11 + * files with no O_INRESERVE goes to all groups, but last. probably we'll
12 + * specify group for O_INRESERVE files later -bzzz */
13 +static inline int ext3_group_allowed(struct super_block *sb, int mode, int group)
14 +{
15 +       if (!test_opt(sb, INRESERVE) || EXT3_SB(sb)->s_groups_count == 1)
16 +               return 1;
17 +
18 +       if (mode & EXT3_S_INRESERVE) {
19 +               if (group != EXT3_SB(sb)->s_groups_count - 1)
20 +                       return 0;
21 +       } else {
22 +               if (group == EXT3_SB(sb)->s_groups_count -1 )
23 +                       return 0;
24 +       }
25 +       return 1;
26 +}
27  
28  /*
29   * Read the inode allocation bitmap for a given block_group, reading
30 @@ -203,7 +221,7 @@
31   * For other inodes, search forward from the parent directory\'s block
32   * group to find a free inode.
33   */
34 -static int find_group_dir(struct super_block *sb, struct inode *parent)
35 +static int find_group_dir(struct super_block *sb, struct inode *parent, int mode)
36  {
37         int ngroups = EXT3_SB(sb)->s_groups_count;
38         int freei, avefreei;
39 @@ -215,6 +233,8 @@
40         avefreei = freei / ngroups;
41  
42         for (group = 0; group < ngroups; group++) {
43 +               if (!ext3_group_allowed(sb, mode, group))
44 +                       continue;
45                 desc = ext3_get_group_desc (sb, group, &bh);
46                 if (!desc || !desc->bg_free_inodes_count)
47                         continue;
48 @@ -258,7 +278,7 @@
49  #define INODE_COST 64
50  #define BLOCK_COST 256
51  
52 -static int find_group_orlov(struct super_block *sb, struct inode *parent)
53 +static int find_group_orlov(struct super_block *sb, struct inode *parent, int mode)
54  {
55         int parent_group = EXT3_I(parent)->i_block_group;
56         struct ext3_sb_info *sbi = EXT3_SB(sb);
57 @@ -288,6 +308,8 @@
58                 parent_group = (unsigned)group % ngroups;
59                 for (i = 0; i < ngroups; i++) {
60                         group = (parent_group + i) % ngroups;
61 +                       if (!ext3_group_allowed(sb, mode, group))
62 +                               continue;
63                         desc = ext3_get_group_desc (sb, group, &bh);
64                         if (!desc || !desc->bg_free_inodes_count)
65                                 continue;
66 @@ -357,7 +379,7 @@
67         return -1;
68  }
69  
70 -static int find_group_other(struct super_block *sb, struct inode *parent)
71 +static int find_group_other(struct super_block *sb, struct inode *parent, int mode)
72  {
73         int parent_group = EXT3_I(parent)->i_block_group;
74         int ngroups = EXT3_SB(sb)->s_groups_count;
75 @@ -393,6 +415,8 @@
76                 group += i;
77                 if (group >= ngroups)
78                         group -= ngroups;
79 +               if (!ext3_group_allowed(sb, mode, group))
80 +                       continue;
81                 desc = ext3_get_group_desc (sb, group, &bh);
82                 if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
83                                 le16_to_cpu(desc->bg_free_blocks_count))
84 @@ -407,6 +431,8 @@
85         for (i = 0; i < ngroups; i++) {
86                 if (++group >= ngroups)
87                         group = 0;
88 +               if (!ext3_group_allowed(sb, mode, group))
89 +                       continue;
90                 desc = ext3_get_group_desc (sb, group, &bh);
91                 if (desc && le16_to_cpu(desc->bg_free_inodes_count))
92                         return group;
93 @@ -502,40 +528,41 @@
94  continue_allocation:
95         if (S_ISDIR(mode)) {
96                 if (test_opt (sb, OLDALLOC))
97 -                       group = find_group_dir(sb, dir);
98 +                       group = find_group_dir(sb, dir, mode);
99                 else
100 -                       group = find_group_orlov(sb, dir);
101 +                       group = find_group_orlov(sb, dir, mode);
102         } else 
103 -               group = find_group_other(sb, dir);
104 +               group = find_group_other(sb, dir, mode);
105  
106         err = -ENOSPC;
107         if (group == -1)
108                 goto out;
109  
110         for (i = 0; i < sbi->s_groups_count; i++) {
111 -               gdp = ext3_get_group_desc(sb, group, &bh2);
112 +               if (ext3_group_allowed(sb, mode, group)) {
113 +                       gdp = ext3_get_group_desc(sb, group, &bh2);
114  
115 -               err = -EIO;
116 -               brelse(bitmap_bh);
117 -               bitmap_bh = read_inode_bitmap(sb, group);
118 -               if (!bitmap_bh)
119 -                       goto fail;
120 +                       err = -EIO;
121 +                       brelse(bitmap_bh);
122 +                       bitmap_bh = read_inode_bitmap(sb, group);
123 +                       if (!bitmap_bh)
124 +                               goto fail;
125 +
126 +                       ino = 0;
127 +
128 +       repeat_in_this_group:
129 +                       ino = ext3_find_next_zero_bit((unsigned long *)
130 +                                       bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
131 +                       if (ino < EXT3_INODES_PER_GROUP(sb)) {
132 +                               if (ext3_test_allocatable(ino, bitmap_bh)) { 
133 +                                       goto got;
134 +                               }
135 +                               J_ASSERT_BH(bitmap_bh, bh2jh(bitmap_bh)->b_committed_data);
136  
137 -               ino = 0;
138 -
139 -repeat_in_this_group:
140 -               ino = ext3_find_next_zero_bit((unsigned long *)
141 -                               bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
142 -               if (ino < EXT3_INODES_PER_GROUP(sb)) {
143 -                       if (ext3_test_allocatable(ino, bitmap_bh)) { 
144 -                               goto got;
145 +                               if (++ino < EXT3_INODES_PER_GROUP(sb))
146 +                                       goto repeat_in_this_group;
147                         }
148 -                       J_ASSERT_BH(bitmap_bh, bh2jh(bitmap_bh)->b_committed_data);
149 -
150 -                       if (++ino < EXT3_INODES_PER_GROUP(sb))
151 -                               goto repeat_in_this_group;
152                 }
153 -
154                 /*
155                  * This case is possible in concurrent environment.  It is very
156                  * rare.  We cannot repeat the find_group_xxx() call because
157 @@ -548,7 +575,6 @@
158         }
159         err = -ENOSPC;
160         goto out;
161 -
162  got:
163          BUFFER_TRACE(bitmap_bh, "get_undo_access");
164          err = ext3_journal_get_undo_access(handle, bitmap_bh, NULL);
165 @@ -567,6 +593,7 @@
166          if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data)
167                 J_ASSERT_BH(bitmap_bh, !ext3_test_bit(ino, bh2jh(bitmap_bh)->b_committed_data));
168         
169 +       J_ASSERT(ext3_group_allowed(sb, mode, group));  
170         ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
171         if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
172                 ext3_error (sb, "ext3_new_inode",
173 @@ -605,7 +632,7 @@
174                         mode |= S_ISGID;
175         } else
176                 inode->i_gid = current->fsgid;
177 -       inode->i_mode = mode;
178 +       inode->i_mode = mode & ~EXT3_S_INRESERVE;
179  
180         inode->i_ino = ino;
181         /* This is the optimal IO size (for stat), not the fs block size */
182 Index: linux-2.6.7/fs/ext3/super.c
183 ===================================================================
184 --- linux-2.6.7.orig/fs/ext3/super.c    2004-08-27 12:04:38.000000000 +0800
185 +++ linux-2.6.7/fs/ext3/super.c 2004-08-27 14:28:22.000000000 +0800
186 @@ -644,6 +644,7 @@
187         {Opt_iopen_nopriv,  "iopen_nopriv"},
188         {Opt_extents, "extents"},
189         {Opt_extdebug, "extdebug"},
190 +       {Opt_inrsv,    "inrsv"},
191         {Opt_err, NULL}
192  };
193  
194 @@ -929,6 +930,10 @@
195                 case Opt_extdebug:
196                         set_opt (sbi->s_mount_opt, EXTDEBUG);
197                         break;
198 +               case Opt_inrsv:
199 +                       set_opt (sbi->s_mount_opt, INRESERVE);
200 +                       J_ASSERT((EXT3_S_INRESERVE & S_IALLUGO) == 0);
201 +                       break;  
202                 default:
203                         printk (KERN_ERR
204                                 "EXT3-fs: Unrecognized mount option \"%s\" "
205 Index: linux-2.6.7/fs/ext3/namei.c
206 ===================================================================
207 --- linux-2.6.7.orig/fs/ext3/namei.c    2004-08-27 13:03:21.000000000 +0800
208 +++ linux-2.6.7/fs/ext3/namei.c 2004-08-27 14:36:09.000000000 +0800
209 @@ -1701,6 +1701,36 @@
210         return err;
211  }
212  
213 +static int ext3_create_it (struct inode * dir, struct dentry * dentry, int mode,
214 +                               struct lookup_intent *it)
215 +{
216 +       handle_t *handle; 
217 +       struct inode * inode;
218 +       int err;
219 +
220 +       handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
221 +                                       EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
222 +       if (IS_ERR(handle)) {
223 +               return PTR_ERR(handle);
224 +       }
225 +
226 +       if (IS_SYNC(dir))
227 +               handle->h_sync = 1;
228 +
229 +       if (it && it->it_flags & O_INRESERVE)
230 +               mode |= EXT3_S_INRESERVE;
231 +       inode = ext3_new_inode_wantedi (handle, dir, mode, dentry);
232 +       err = PTR_ERR(inode);
233 +       if (!IS_ERR(inode)) {
234 +               inode->i_op = &ext3_file_inode_operations;
235 +               inode->i_fop = &ext3_file_operations;
236 +               ext3_set_aops(inode);
237 +               err = ext3_add_nondir(handle, dentry, inode);
238 +       }
239 +       ext3_journal_stop(handle, dir);
240 +       return err;
241 +}
242 +
243  static int ext3_mknod (struct inode * dir, struct dentry *dentry,
244                         int mode, dev_t rdev)
245  {
246 @@ -2462,6 +2492,7 @@
247   */
248  struct inode_operations ext3_dir_inode_operations = {
249         .create         = ext3_create,
250 +       .create_it      = ext3_create_it,               /* BKL held */
251         .lookup         = ext3_lookup,
252         .link           = ext3_link,
253         .unlink         = ext3_unlink,
254 Index: linux-2.6.7/include/asm-i386/fcntl.h
255 ===================================================================
256 --- linux-2.6.7.orig/include/asm-i386/fcntl.h   2004-06-16 13:19:35.000000000 +0800
257 +++ linux-2.6.7/include/asm-i386/fcntl.h        2004-08-27 14:28:22.000000000 +0800
258 @@ -20,6 +20,7 @@
259  #define O_LARGEFILE    0100000
260  #define O_DIRECTORY    0200000 /* must be a directory */
261  #define O_NOFOLLOW     0400000 /* don't follow links */
262 +#define O_INRESERVE    01000000 /* allocate inodes in reserved space */
263  
264  #define F_DUPFD                0       /* dup */
265  #define F_GETFD                1       /* get close_on_exec */
266 Index: linux-2.6.7/include/linux/ext3_fs.h
267 ===================================================================
268 --- linux-2.6.7.orig/include/linux/ext3_fs.h    2004-08-27 13:03:21.000000000 +0800
269 +++ linux-2.6.7/include/linux/ext3_fs.h 2004-08-27 14:28:22.000000000 +0800
270 @@ -343,6 +343,7 @@
271  #define EXT3_MOUNT_EXTDEBUG            0x20000 /* Extents debug */
272  #define EXT3_MOUNT_IOPEN               0x40000 /* Allow access via iopen */
273  #define EXT3_MOUNT_IOPEN_NOPRIV                0x80000 /* Make iopen world-readable */
274 +#define EXT3_MOUNT_INRESERVE           0x400000/* reserve one group for O_INRESERVE */
275  
276  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
277  #ifndef clear_opt
278 @@ -493,6 +494,8 @@
279  
280  #define EXT3_GOOD_OLD_INODE_SIZE 128
281  
282 +#define EXT3_S_INRESERVE       01000000
283 +
284  /*
285   * Feature set definitions
286   */