Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / quota-deadlock-on-pagelock-core.patch
1
2 From: Jan Kara <jack@suse.cz>
3
4 The four patches in this series fix deadlocks with quotas of pagelock (the
5 problem was lock inversion on PageLock and transaction start - quota code
6 needed to first start a transaction and then write the data which subsequently
7 needed acquisition of PageLock while the standard ordering - PageLock first
8 and transaction start later - was used e.g.  by pdflush).  They implement a
9 new way of quota access to disk: Every filesystem that would like to implement
10 quotas now has to provide quota_read() and quota_write() functions.  These
11 functions must obey quota lock ordering (in particular they should not take
12 PageLock inside a transaction).
13
14 The first patch implements the changes in the quota core, the other three
15 patches implement needed functions in ext2, ext3 and reiserfs.  The patch for
16 reiserfs also fixes several other lock inversion problems (similar as ext3
17 had) and implements the journaled quota functionality (which comes almost for
18 free after the locking fixes...).
19
20 The quota core patch makes quota support in other filesystems (except XFS
21 which implements everything on its own ;)) unfunctional (quotaon() will refuse
22 to turn on quotas on them).  When the patches get reasonable wide testing and
23 it will seem that no major changes will be needed I can make fixes also for
24 the other filesystems (JFS, UDF, UFS).
25
26 This patch:
27
28 The patch implements the new way of quota io in the quota core.  Every
29 filesystem wanting to support quotas has to provide functions quota_read()
30 and quota_write() obeying quota locking rules.  As the writes and reads
31 bypass the pagecache there is some ugly stuff ensuring that userspace can
32 see all the data after quotaoff() (or Q_SYNC quotactl).  In future I plan
33 to make quota files inaccessible from userspace (with the exception of
34 quotacheck(8) which will take care about the cache flushing and such stuff
35 itself) so that this synchronization stuff can be removed...
36
37 The rewrite of the quota core. Quota uses the filesystem read() and write()
38 functions no more to avoid possible deadlocks on PageLock. From now on every
39 filesystem supporting quotas must provide functions quota_read() and
40 quota_write() which obey the quota locking rules (e.g. they cannot acquire the
41 PageLock).
42
43 Signed-off-by: Jan Kara <jack@suse.cz>
44 Signed-off-by: Andrew Morton <akpm@osdl.org>
45 ---
46
47  25-akpm/fs/dquot.c               |  162 +++++++++++++--------------
48  25-akpm/fs/quota.c               |   45 +++++++
49  25-akpm/fs/quota_v1.c            |   62 ++--------
50  25-akpm/fs/quota_v2.c            |  227 +++++++++++++++++----------------------
51  25-akpm/include/linux/fs.h       |    3 
52  25-akpm/include/linux/quota.h    |    2 
53  25-akpm/include/linux/security.h |    8 -
54  25-akpm/security/dummy.c         |    2 
55  25-akpm/security/selinux/hooks.c |    4 
56  9 files changed, 247 insertions(+), 268 deletions(-)
57
58 diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c
59 --- 25/fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core  2004-12-03 20:56:04.293107536 -0800
60 +++ 25-akpm/fs/dquot.c  2004-12-03 20:56:04.312104648 -0800
61 @@ -49,7 +49,7 @@
62   *             New SMP locking.
63   *             Jan Kara, <jack@suse.cz>, 10/2002
64   *
65 - *             Added journalled quota support
66 + *             Added journalled quota support, fix lock inversion problems
67   *             Jan Kara, <jack@suse.cz>, 2003,2004
68   *
69   * (C) Copyright 1994 - 1997 Marco van Wieringen 
70 @@ -75,7 +75,8 @@
71  #include <linux/proc_fs.h>
72  #include <linux/security.h>
73  #include <linux/kmod.h>
74 -#include <linux/pagemap.h>
75 +#include <linux/namei.h>
76 +#include <linux/buffer_head.h>
77  
78  #include <asm/uaccess.h>
79  
80 @@ -114,7 +115,7 @@
81   * operations on dquots don't hold dq_lock as they copy data under dq_data_lock
82   * spinlock to internal buffers before writing.
83   *
84 - * Lock ordering (including related VFS locks) is following:
85 + * Lock ordering (including related VFS locks) is the following:
86   *   i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
87   *   > dquot->dq_lock > dqio_sem
88   * i_sem on quota files is special (it's below dqio_sem)
89 @@ -183,8 +184,7 @@ static void put_quota_format(struct quot
90   * on all three lists, depending on its current state.
91   *
92   * All dquots are placed to the end of inuse_list when first created, and this
93 - * list is used for the sync and invalidate operations, which must look
94 - * at every dquot.
95 + * list is used for invalidate operation, which must look at every dquot.
96   *
97   * Unused dquots (dq_count == 0) are added to the free_dquots list when freed,
98   * and this list is searched whenever we need an available dquot.  Dquots are
99 @@ -1314,10 +1314,12 @@ int vfs_quota_off(struct super_block *sb
100  {
101         int cnt;
102         struct quota_info *dqopt = sb_dqopt(sb);
103 +       struct inode *toput[MAXQUOTAS];
104  
105         /* We need to serialize quota_off() for device */
106         down(&dqopt->dqonoff_sem);
107         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
108 +               toput[cnt] = NULL;
109                 if (type != -1 && cnt != type)
110                         continue;
111                 if (!sb_has_quota_enabled(sb, cnt))
112 @@ -1337,7 +1339,7 @@ int vfs_quota_off(struct super_block *sb
113                         dqopt->ops[cnt]->free_file_info(sb, cnt);
114                 put_quota_format(dqopt->info[cnt].dqi_format);
115  
116 -               fput(dqopt->files[cnt]);
117 +               toput[cnt] = dqopt->files[cnt];
118                 dqopt->files[cnt] = NULL;
119                 dqopt->info[cnt].dqi_flags = 0;
120                 dqopt->info[cnt].dqi_igrace = 0;
121 @@ -1345,6 +1347,26 @@ int vfs_quota_off(struct super_block *sb
122                 dqopt->ops[cnt] = NULL;
123         }
124         up(&dqopt->dqonoff_sem);
125 +       /* Sync the superblock so that buffers with quota data are written to
126 +         * disk (and so userspace sees correct data afterwards) */
127 +       if (sb->s_op->sync_fs)
128 +               sb->s_op->sync_fs(sb, 1);
129 +       sync_blockdev(sb->s_bdev);
130 +       /* Now the quota files are just ordinary files and we can set the
131 +        * inode flags back. Moreover we discard the pagecache so that
132 +        * userspace sees the writes we did bypassing the pagecache. We
133 +        * must also discard the blockdev buffers so that we see the
134 +        * changes done by userspace on the next quotaon() */
135 +       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
136 +               if (toput[cnt]) {
137 +                       down(&toput[cnt]->i_sem);
138 +                       toput[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA);
139 +                       truncate_inode_pages(&toput[cnt]->i_data, 0);
140 +                       up(&toput[cnt]->i_sem);
141 +                       mark_inode_dirty(toput[cnt]);
142 +                       iput(toput[cnt]);
143 +               }
144 +       invalidate_bdev(sb->s_bdev, 0);
145         return 0;
146  }
147  
148 @@ -1352,68 +1374,56 @@ int vfs_quota_off(struct super_block *sb
149   *     Turn quotas on on a device
150   */
151  
152 -/* Helper function when we already have file open */
153 -static int vfs_quota_on_file(struct file *f, int type, int format_id)
154 +/* Helper function when we already have the inode */
155 +static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
156  {
157         struct quota_format_type *fmt = find_quota_format(format_id);
158 -       struct inode *inode;
159 -       struct super_block *sb = f->f_dentry->d_sb;
160 +       struct super_block *sb = inode->i_sb;
161         struct quota_info *dqopt = sb_dqopt(sb);
162 -       struct dquot *to_drop[MAXQUOTAS];
163 -       int error, cnt;
164 -       unsigned int oldflags = -1;
165 +       int error;
166 +       int oldflags = -1;
167  
168         if (!fmt)
169                 return -ESRCH;
170 -       error = -EIO;
171 -       if (!f->f_op || !f->f_op->read || !f->f_op->write)
172 +       if (!S_ISREG(inode->i_mode)) {
173 +               error = -EACCES;
174                 goto out_fmt;
175 -       inode = f->f_dentry->d_inode;
176 -       error = -EACCES;
177 -       if (!S_ISREG(inode->i_mode))
178 +       }
179 +       if (IS_RDONLY(inode)) {
180 +               error = -EROFS;
181 +               goto out_fmt;
182 +       }
183 +       if (!sb->s_op->quota_write || !sb->s_op->quota_read) {
184 +               error = -EINVAL;
185                 goto out_fmt;
186 +       }
187  
188 +       /* As we bypass the pagecache we must now flush the inode so that
189 +        * we see all the changes from userspace... */
190 +       write_inode_now(inode, 1);
191 +       /* And now flush the block cache so that kernel sees the changes */
192 +       invalidate_bdev(sb->s_bdev, 0);
193         down(&inode->i_sem);
194         down(&dqopt->dqonoff_sem);
195         if (sb_has_quota_enabled(sb, type)) {
196 -               up(&inode->i_sem);
197                 error = -EBUSY;
198                 goto out_lock;
199         }
200         /* We don't want quota and atime on quota files (deadlocks possible)
201 -        * We also need to set GFP mask differently because we cannot recurse
202 -        * into filesystem when allocating page for quota inode */
203 +        * Also nobody should write to the file - we use special IO operations
204 +        * which ignore the immutable bit. */
205         down_write(&dqopt->dqptr_sem);
206 -       oldflags = inode->i_flags & (S_NOATIME | S_NOQUOTA);
207 -       inode->i_flags |= S_NOQUOTA | S_NOATIME;
208 +       oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
209 +       inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
210         up_write(&dqopt->dqptr_sem);
211 -       up(&inode->i_sem);
212  
213 -       dqopt->files[type] = f;
214 +       error = -EIO;
215 +       dqopt->files[type] = igrab(inode);
216 +       if (!dqopt->files[type])
217 +               goto out_lock;
218         error = -EINVAL;
219         if (!fmt->qf_ops->check_quota_file(sb, type))
220                 goto out_file_init;
221 -       /*
222 -        * We write to quota files deep within filesystem code.  We don't want
223 -        * the VFS to reenter filesystem code when it tries to allocate a
224 -        * pagecache page for the quota file write.  So clear __GFP_FS in
225 -        * the quota file's allocation flags.
226 -        */
227 -       mapping_set_gfp_mask(inode->i_mapping,
228 -               mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
229 -
230 -       down_write(&dqopt->dqptr_sem);
231 -       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
232 -               to_drop[cnt] = inode->i_dquot[cnt];
233 -               inode->i_dquot[cnt] = NODQUOT;
234 -       }
235 -       up_write(&dqopt->dqptr_sem);
236 -       /* We must put dquots outside of dqptr_sem because we may need to
237 -        * start transaction for dquot_release() */
238 -       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
239 -               if (to_drop[cnt])
240 -                       dqput(to_drop[cnt]);
241 -       }
242  
243         dqopt->ops[type] = fmt->qf_ops;
244         dqopt->info[type].dqi_format = fmt;
245 @@ -1424,6 +1434,7 @@ static int vfs_quota_on_file(struct file
246                 goto out_file_init;
247         }
248         up(&dqopt->dqio_sem);
249 +       up(&inode->i_sem);
250         set_enable_flags(dqopt, type);
251  
252         add_dquot_ref(sb, type);
253 @@ -1433,19 +1444,18 @@ static int vfs_quota_on_file(struct file
254  
255  out_file_init:
256         dqopt->files[type] = NULL;
257 +       iput(inode);
258  out_lock:
259         up(&dqopt->dqonoff_sem);
260         if (oldflags != -1) {
261 -               down(&inode->i_sem);
262                 down_write(&dqopt->dqptr_sem);
263 -               /* Reset the NOATIME flag back. I know it could change in the
264 -                * mean time but playing with NOATIME flags on a quota file is
265 -                * never a good idea */
266 -               inode->i_flags &= ~(S_NOATIME | S_NOQUOTA);
267 +               /* Set the flags back (in the case of accidental quotaon()
268 +                * on a wrong file we don't want to mess up the flags) */
269 +               inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
270                 inode->i_flags |= oldflags;
271                 up_write(&dqopt->dqptr_sem);
272 -               up(&inode->i_sem);
273         }
274 +       up(&inode->i_sem);
275  out_fmt:
276         put_quota_format(fmt);
277  
278 @@ -1455,47 +1465,37 @@ out_fmt:
279  /* Actual function called from quotactl() */
280  int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
281  {
282 -       struct file *f;
283 +       struct nameidata nd;
284         int error;
285  
286 -       f = filp_open(path, O_RDWR, 0600);
287 -       if (IS_ERR(f))
288 -               return PTR_ERR(f);
289 -       error = security_quota_on(f);
290 +       error = path_lookup(path, LOOKUP_FOLLOW, &nd);
291 +       if (error < 0)
292 +               return error;
293 +       error = security_quota_on(nd.dentry);
294         if (error)
295 -               goto out_f;
296 -       error = vfs_quota_on_file(f, type, format_id);
297 -       if (!error)
298 -               return 0;
299 -out_f:
300 -       filp_close(f, NULL);
301 +               goto out_path;
302 +       /* Quota file not on the same filesystem? */
303 +       if (nd.mnt->mnt_sb != sb)
304 +               error = -EXDEV;
305 +       else
306 +               error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
307 +out_path:
308 +       path_release(&nd);
309         return error;
310  }
311  
312  /*
313 - * Function used by filesystems when filp_open() would fail (filesystem is
314 - * being mounted now). We will use a private file structure. Caller is
315 - * responsible that it's IO functions won't need vfsmnt structure or
316 - * some dentry tricks...
317 + * This function is used when filesystem needs to initialize quotas
318 + * during mount time.
319   */
320  int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry)
321  {
322 -       struct file *f;
323         int error;
324  
325 -       dget(dentry);   /* Get a reference for struct file */
326 -       f = dentry_open(dentry, NULL, O_RDWR);
327 -       if (IS_ERR(f)) {
328 -               error = PTR_ERR(f);
329 -               goto out_dentry;
330 -       }
331 -       error = vfs_quota_on_file(f, type, format_id);
332 -       if (!error)
333 -               return 0;
334 -       fput(f);
335 -out_dentry:
336 -       dput(dentry);
337 -       return error;
338 +       error = security_quota_on(dentry);
339 +       if (error)
340 +               return error;
341 +       return vfs_quota_on_inode(dentry->d_inode, type, format_id);
342  }
343  
344  /* Generic routine for getting common part of quota structure */
345 diff -puN fs/quota.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota.c
346 --- 25/fs/quota.c~fix-of-quota-deadlock-on-pagelock-quota-core  2004-12-03 20:56:04.295107232 -0800
347 +++ 25-akpm/fs/quota.c  2004-12-03 20:56:04.313104496 -0800
348 @@ -13,6 +13,8 @@
349  #include <linux/kernel.h>
350  #include <linux/smp_lock.h>
351  #include <linux/security.h>
352 +#include <linux/syscalls.h>
353 +#include <linux/buffer_head.h>
354
355  /* Check validity of quotactl */
356  static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
357 @@ -135,16 +136,54 @@ restart:
358         return NULL;
359  }
360  
361 +void quota_sync_sb(struct super_block *sb, int type)
362 +{
363 +       int cnt;
364 +       struct inode *discard[MAXQUOTAS];
365 +
366 +       sb->s_qcop->quota_sync(sb, type);
367 +       /* This is not very clever (and fast) but currently I don't know about
368 +        * any other simple way of getting quota data to disk and we must get
369 +        * them there for userspace to be visible... */
370 +       if (sb->s_op->sync_fs)
371 +               sb->s_op->sync_fs(sb, 1);
372 +       sync_blockdev(sb->s_bdev);
373 +
374 +       /* Now when everything is written we can discard the pagecache so
375 +        * that userspace sees the changes. We need i_sem and so we could
376 +        * not do it inside dqonoff_sem. Moreover we need to be carefull
377 +        * about races with quotaoff() (that is the reason why we have own
378 +        * reference to inode). */
379 +       down(&sb_dqopt(sb)->dqonoff_sem);
380 +       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
381 +               discard[cnt] = NULL;
382 +               if (type != -1 && cnt != type)
383 +                       continue;
384 +               if (!sb_has_quota_enabled(sb, cnt))
385 +                       continue;
386 +               discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]);
387 +       }
388 +       up(&sb_dqopt(sb)->dqonoff_sem);
389 +       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
390 +               if (discard[cnt]) {
391 +                       down(&discard[cnt]->i_sem);
392 +                       truncate_inode_pages(&discard[cnt]->i_data, 0);
393 +                       up(&discard[cnt]->i_sem);
394 +                       iput(discard[cnt]);
395 +               }
396 +       }
397 +}
398 +
399  void sync_dquots(struct super_block *sb, int type)
400  {
401         if (sb) {
402                 if (sb->s_qcop->quota_sync)
403 -                       sb->s_qcop->quota_sync(sb, type);
404 +                       quota_sync_sb(sb, type);
405         }
406         else {
407 -               while ((sb = get_super_to_sync(type)) != 0) {
408 +               while ((sb = get_super_to_sync(type)) != NULL) {
409                         if (sb->s_qcop->quota_sync)
410 -                               sb->s_qcop->quota_sync(sb, type);
411 +                               quota_sync_sb(sb, type);
412                         drop_super(sb);
413                 }
414         }
415 diff -puN fs/quota_v1.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota_v1.c
416 --- 25/fs/quota_v1.c~fix-of-quota-deadlock-on-pagelock-quota-core       2004-12-03 20:56:04.296107080 -0800
417 +++ 25-akpm/fs/quota_v1.c       2004-12-03 20:56:04.314104344 -0800
418 @@ -7,7 +7,6 @@
419  #include <linux/init.h>
420  #include <linux/module.h>
421  
422 -#include <asm/uaccess.h>
423  #include <asm/byteorder.h>
424  
425  MODULE_AUTHOR("Jan Kara");
426 @@ -41,23 +40,14 @@ static void v1_mem2disk_dqblk(struct v1_
427  static int v1_read_dqblk(struct dquot *dquot)
428  {
429         int type = dquot->dq_type;
430 -       struct file *filp;
431 -       mm_segment_t fs;
432 -       loff_t offset;
433         struct v1_disk_dqblk dqblk;
434  
435 -       filp = sb_dqopt(dquot->dq_sb)->files[type];
436 -       if (filp == (struct file *)NULL)
437 +       if (!sb_dqopt(dquot->dq_sb)->files[type])
438                 return -EINVAL;
439  
440 -       /* Now we are sure filp is valid */
441 -       offset = v1_dqoff(dquot->dq_id);
442         /* Set structure to 0s in case read fails/is after end of file */
443         memset(&dqblk, 0, sizeof(struct v1_disk_dqblk));
444 -       fs = get_fs();
445 -       set_fs(KERNEL_DS);
446 -       filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset);
447 -       set_fs(fs);
448 +       dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
449  
450         v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk);
451         if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 &&
452 @@ -71,26 +61,18 @@ static int v1_read_dqblk(struct dquot *d
453  static int v1_commit_dqblk(struct dquot *dquot)
454  {
455         short type = dquot->dq_type;
456 -       struct file *filp;
457 -       mm_segment_t fs;
458 -       loff_t offset;
459         ssize_t ret;
460         struct v1_disk_dqblk dqblk;
461  
462 -       filp = sb_dqopt(dquot->dq_sb)->files[type];
463 -       offset = v1_dqoff(dquot->dq_id);
464 -       fs = get_fs();
465 -       set_fs(KERNEL_DS);
466 -
467         v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
468         if (dquot->dq_id == 0) {
469                 dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
470                 dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace;
471         }
472         ret = 0;
473 -       if (filp)
474 -               ret = filp->f_op->write(filp, (char *)&dqblk,
475 -                                       sizeof(struct v1_disk_dqblk), &offset);
476 +       if (sb_dqopt(dquot->dq_sb)->files[type])
477 +               ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, (char *)&dqblk,
478 +                                       sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
479         if (ret != sizeof(struct v1_disk_dqblk)) {
480                 printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
481                         dquot->dq_sb->s_id);
482 @@ -101,7 +83,6 @@ static int v1_commit_dqblk(struct dquot 
483         ret = 0;
484  
485  out:
486 -       set_fs(fs);
487         dqstats.writes++;
488  
489         return ret;
490 @@ -121,14 +102,11 @@ struct v2_disk_dqheader {
491  
492  static int v1_check_quota_file(struct super_block *sb, int type)
493  {
494 -       struct file *f = sb_dqopt(sb)->files[type];
495 -       struct inode *inode = f->f_dentry->d_inode;
496 +       struct inode *inode = sb_dqopt(sb)->files[type];
497         ulong blocks;
498         size_t off; 
499         struct v2_disk_dqheader dqhead;
500 -       mm_segment_t fs;
501         ssize_t size;
502 -       loff_t offset = 0;
503         loff_t isize;
504         static const uint quota_magics[] = V2_INITQMAGICS;
505  
506 @@ -140,10 +118,7 @@ static int v1_check_quota_file(struct su
507         if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk))
508                 return 0;
509         /* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */
510 -       fs = get_fs();
511 -       set_fs(KERNEL_DS);
512 -       size = f->f_op->read(f, (char *)&dqhead, sizeof(struct v2_disk_dqheader), &offset);
513 -       set_fs(fs);
514 +       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
515         if (size != sizeof(struct v2_disk_dqheader))
516                 return 1;       /* Probably not new format */
517         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type])
518 @@ -155,16 +130,10 @@ static int v1_check_quota_file(struct su
519  static int v1_read_file_info(struct super_block *sb, int type)
520  {
521         struct quota_info *dqopt = sb_dqopt(sb);
522 -       mm_segment_t fs;
523 -       loff_t offset;
524 -       struct file *filp = dqopt->files[type];
525         struct v1_disk_dqblk dqblk;
526         int ret;
527  
528 -       offset = v1_dqoff(0);
529 -       fs = get_fs();
530 -       set_fs(KERNEL_DS);
531 -       if ((ret = filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset)) != sizeof(struct v1_disk_dqblk)) {
532 +       if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
533                 if (ret >= 0)
534                         ret = -EIO;
535                 goto out;
536 @@ -173,38 +142,31 @@ static int v1_read_file_info(struct supe
537         dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME;
538         dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME;
539  out:
540 -       set_fs(fs);
541         return ret;
542  }
543  
544  static int v1_write_file_info(struct super_block *sb, int type)
545  {
546         struct quota_info *dqopt = sb_dqopt(sb);
547 -       mm_segment_t fs;
548 -       struct file *filp = dqopt->files[type];
549         struct v1_disk_dqblk dqblk;
550 -       loff_t offset;
551         int ret;
552  
553         dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY;
554 -       offset = v1_dqoff(0);
555 -       fs = get_fs();
556 -       set_fs(KERNEL_DS);
557 -       if ((ret = filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset)) != sizeof(struct v1_disk_dqblk)) {
558 +       if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk,
559 +           sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
560                 if (ret >= 0)
561                         ret = -EIO;
562                 goto out;
563         }
564         dqblk.dqb_itime = dqopt->info[type].dqi_igrace;
565         dqblk.dqb_btime = dqopt->info[type].dqi_bgrace;
566 -       offset = v1_dqoff(0);
567 -       ret = filp->f_op->write(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset);
568 +       ret = sb->s_op->quota_write(sb, type, (char *)&dqblk,
569 +             sizeof(struct v1_disk_dqblk), v1_dqoff(0));
570         if (ret == sizeof(struct v1_disk_dqblk))
571                 ret = 0;
572         else if (ret > 0)
573                 ret = -EIO;
574  out:
575 -       set_fs(fs);
576         return ret;
577  }
578  
579 diff -puN fs/quota_v2.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota_v2.c
580 --- 25/fs/quota_v2.c~fix-of-quota-deadlock-on-pagelock-quota-core       2004-12-03 20:56:04.297106928 -0800
581 +++ 25-akpm/fs/quota_v2.c       2004-12-03 20:56:04.318103736 -0800
582 @@ -13,7 +13,6 @@
583  #include <linux/slab.h>
584  
585  #include <asm/byteorder.h>
586 -#include <asm/uaccess.h>
587  
588  MODULE_AUTHOR("Jan Kara");
589  MODULE_DESCRIPTION("Quota format v2 support");
590 @@ -30,19 +29,15 @@ typedef char *dqbuf_t;
591  static int v2_check_quota_file(struct super_block *sb, int type)
592  {
593         struct v2_disk_dqheader dqhead;
594 -       struct file *f = sb_dqopt(sb)->files[type];
595 -       mm_segment_t fs;
596         ssize_t size;
597 -       loff_t offset = 0;
598         static const uint quota_magics[] = V2_INITQMAGICS;
599         static const uint quota_versions[] = V2_INITQVERSIONS;
600   
601 -       fs = get_fs();
602 -       set_fs(KERNEL_DS);
603 -       size = f->f_op->read(f, (char *)&dqhead, sizeof(struct v2_disk_dqheader), &offset);
604 -       set_fs(fs);
605 -       if (size != sizeof(struct v2_disk_dqheader))
606 +       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
607 +       if (size != sizeof(struct v2_disk_dqheader)) {
608 +               printk("failed read\n");
609                 return 0;
610 +       }
611         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
612             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
613                 return 0;
614 @@ -52,20 +47,15 @@ static int v2_check_quota_file(struct su
615  /* Read information header from quota file */
616  static int v2_read_file_info(struct super_block *sb, int type)
617  {
618 -       mm_segment_t fs;
619         struct v2_disk_dqinfo dinfo;
620         struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
621 -       struct file *f = sb_dqopt(sb)->files[type];
622         ssize_t size;
623 -       loff_t offset = V2_DQINFOOFF;
624  
625 -       fs = get_fs();
626 -       set_fs(KERNEL_DS);
627 -       size = f->f_op->read(f, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), &offset);
628 -       set_fs(fs);
629 +       size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
630 +              sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
631         if (size != sizeof(struct v2_disk_dqinfo)) {
632                 printk(KERN_WARNING "Can't read info structure on device %s.\n",
633 -                       f->f_dentry->d_sb->s_id);
634 +                       sb->s_id);
635                 return -1;
636         }
637         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
638 @@ -80,12 +70,9 @@ static int v2_read_file_info(struct supe
639  /* Write information header to quota file */
640  static int v2_write_file_info(struct super_block *sb, int type)
641  {
642 -       mm_segment_t fs;
643         struct v2_disk_dqinfo dinfo;
644         struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
645 -       struct file *f = sb_dqopt(sb)->files[type];
646         ssize_t size;
647 -       loff_t offset = V2_DQINFOOFF;
648  
649         spin_lock(&dq_data_lock);
650         info->dqi_flags &= ~DQF_INFO_DIRTY;
651 @@ -96,13 +83,11 @@ static int v2_write_file_info(struct sup
652         dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
653         dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
654         dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
655 -       fs = get_fs();
656 -       set_fs(KERNEL_DS);
657 -       size = f->f_op->write(f, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), &offset);
658 -       set_fs(fs);
659 +       size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
660 +              sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
661         if (size != sizeof(struct v2_disk_dqinfo)) {
662                 printk(KERN_WARNING "Can't write info structure on device %s.\n",
663 -                       f->f_dentry->d_sb->s_id);
664 +                       sb->s_id);
665                 return -1;
666         }
667         return 0;
668 @@ -146,39 +131,24 @@ static inline void freedqbuf(dqbuf_t buf
669         kfree(buf);
670  }
671  
672 -static ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
673 +static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
674  {
675 -       mm_segment_t fs;
676 -       ssize_t ret;
677 -       loff_t offset = blk<<V2_DQBLKSIZE_BITS;
678 -
679         memset(buf, 0, V2_DQBLKSIZE);
680 -       fs = get_fs();
681 -       set_fs(KERNEL_DS);
682 -       ret = filp->f_op->read(filp, (char *)buf, V2_DQBLKSIZE, &offset);
683 -       set_fs(fs);
684 -       return ret;
685 +       return sb->s_op->quota_read(sb, type, (char *)buf,
686 +              V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
687  }
688  
689 -static ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
690 +static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
691  {
692 -       mm_segment_t fs;
693 -       ssize_t ret;
694 -       loff_t offset = blk<<V2_DQBLKSIZE_BITS;
695 -
696 -       fs = get_fs();
697 -       set_fs(KERNEL_DS);
698 -       ret = filp->f_op->write(filp, (char *)buf, V2_DQBLKSIZE, &offset);
699 -       set_fs(fs);
700 -       return ret;
701 -
702 +       return sb->s_op->quota_write(sb, type, (char *)buf,
703 +              V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
704  }
705  
706  /* Remove empty block from list and return it */
707 -static int get_free_dqblk(struct file *filp, int type)
708 +static int get_free_dqblk(struct super_block *sb, int type)
709  {
710         dqbuf_t buf = getdqbuf();
711 -       struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type);
712 +       struct mem_dqinfo *info = sb_dqinfo(sb, type);
713         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
714         int ret, blk;
715  
716 @@ -186,17 +156,18 @@ static int get_free_dqblk(struct file *f
717                 return -ENOMEM;
718         if (info->u.v2_i.dqi_free_blk) {
719                 blk = info->u.v2_i.dqi_free_blk;
720 -               if ((ret = read_blk(filp, blk, buf)) < 0)
721 +               if ((ret = read_blk(sb, type, blk, buf)) < 0)
722                         goto out_buf;
723                 info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
724         }
725         else {
726                 memset(buf, 0, V2_DQBLKSIZE);
727 -               if ((ret = write_blk(filp, info->u.v2_i.dqi_blocks, buf)) < 0)  /* Assure block allocation... */
728 +               /* Assure block allocation... */
729 +               if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
730                         goto out_buf;
731                 blk = info->u.v2_i.dqi_blocks++;
732         }
733 -       mark_info_dirty(filp->f_dentry->d_sb, type);
734 +       mark_info_dirty(sb, type);
735         ret = blk;
736  out_buf:
737         freedqbuf(buf);
738 @@ -204,9 +175,9 @@ out_buf:
739  }
740  
741  /* Insert empty block to the list */
742 -static int put_free_dqblk(struct file *filp, int type, dqbuf_t buf, uint blk)
743 +static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
744  {
745 -       struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type);
746 +       struct mem_dqinfo *info = sb_dqinfo(sb, type);
747         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
748         int err;
749  
750 @@ -214,17 +185,18 @@ static int put_free_dqblk(struct file *f
751         dh->dqdh_prev_free = cpu_to_le32(0);
752         dh->dqdh_entries = cpu_to_le16(0);
753         info->u.v2_i.dqi_free_blk = blk;
754 -       mark_info_dirty(filp->f_dentry->d_sb, type);
755 -       if ((err = write_blk(filp, blk, buf)) < 0)      /* Some strange block. We had better leave it... */
756 +       mark_info_dirty(sb, type);
757 +       /* Some strange block. We had better leave it... */
758 +       if ((err = write_blk(sb, type, blk, buf)) < 0)
759                 return err;
760         return 0;
761  }
762  
763  /* Remove given block from the list of blocks with free entries */
764 -static int remove_free_dqentry(struct file *filp, int type, dqbuf_t buf, uint blk)
765 +static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
766  {
767         dqbuf_t tmpbuf = getdqbuf();
768 -       struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type);
769 +       struct mem_dqinfo *info = sb_dqinfo(sb, type);
770         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
771         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
772         int err;
773 @@ -232,26 +204,27 @@ static int remove_free_dqentry(struct fi
774         if (!tmpbuf)
775                 return -ENOMEM;
776         if (nextblk) {
777 -               if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
778 +               if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
779                         goto out_buf;
780                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
781 -               if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
782 +               if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
783                         goto out_buf;
784         }
785         if (prevblk) {
786 -               if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
787 +               if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
788                         goto out_buf;
789                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
790 -               if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
791 +               if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
792                         goto out_buf;
793         }
794         else {
795                 info->u.v2_i.dqi_free_entry = nextblk;
796 -               mark_info_dirty(filp->f_dentry->d_sb, type);
797 +               mark_info_dirty(sb, type);
798         }
799         freedqbuf(tmpbuf);
800         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
801 -       if (write_blk(filp, blk, buf) < 0)      /* No matter whether write succeeds block is out of list */
802 +       /* No matter whether write succeeds block is out of list */
803 +       if (write_blk(sb, type, blk, buf) < 0)
804                 printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
805         return 0;
806  out_buf:
807 @@ -260,10 +233,10 @@ out_buf:
808  }
809  
810  /* Insert given block to the beginning of list with free entries */
811 -static int insert_free_dqentry(struct file *filp, int type, dqbuf_t buf, uint blk)
812 +static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
813  {
814         dqbuf_t tmpbuf = getdqbuf();
815 -       struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type);
816 +       struct mem_dqinfo *info = sb_dqinfo(sb, type);
817         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
818         int err;
819  
820 @@ -271,18 +244,18 @@ static int insert_free_dqentry(struct fi
821                 return -ENOMEM;
822         dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
823         dh->dqdh_prev_free = cpu_to_le32(0);
824 -       if ((err = write_blk(filp, blk, buf)) < 0)
825 +       if ((err = write_blk(sb, type, blk, buf)) < 0)
826                 goto out_buf;
827         if (info->u.v2_i.dqi_free_entry) {
828 -               if ((err = read_blk(filp, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
829 +               if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
830                         goto out_buf;
831                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
832 -               if ((err = write_blk(filp, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
833 +               if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
834                         goto out_buf;
835         }
836         freedqbuf(tmpbuf);
837         info->u.v2_i.dqi_free_entry = blk;
838 -       mark_info_dirty(filp->f_dentry->d_sb, type);
839 +       mark_info_dirty(sb, type);
840         return 0;
841  out_buf:
842         freedqbuf(tmpbuf);
843 @@ -292,8 +265,8 @@ out_buf:
844  /* Find space for dquot */
845  static uint find_free_dqentry(struct dquot *dquot, int *err)
846  {
847 -       struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
848 -       struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info+dquot->dq_type;
849 +       struct super_block *sb = dquot->dq_sb;
850 +       struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
851         uint blk, i;
852         struct v2_disk_dqdbheader *dh;
853         struct v2_disk_dqblk *ddquot;
854 @@ -309,22 +282,23 @@ static uint find_free_dqentry(struct dqu
855         ddquot = GETENTRIES(buf);
856         if (info->u.v2_i.dqi_free_entry) {
857                 blk = info->u.v2_i.dqi_free_entry;
858 -               if ((*err = read_blk(filp, blk, buf)) < 0)
859 +               if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
860                         goto out_buf;
861         }
862         else {
863 -               blk = get_free_dqblk(filp, dquot->dq_type);
864 +               blk = get_free_dqblk(sb, dquot->dq_type);
865                 if ((int)blk < 0) {
866                         *err = blk;
867                         freedqbuf(buf);
868                         return 0;
869                 }
870                 memset(buf, 0, V2_DQBLKSIZE);
871 -               info->u.v2_i.dqi_free_entry = blk;      /* This is enough as block is already zeroed and entry list is empty... */
872 -               mark_info_dirty(dquot->dq_sb, dquot->dq_type);
873 +               /* This is enough as block is already zeroed and entry list is empty... */
874 +               info->u.v2_i.dqi_free_entry = blk;
875 +               mark_info_dirty(sb, dquot->dq_type);
876         }
877         if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)   /* Block will be full? */
878 -               if ((*err = remove_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0) {
879 +               if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
880                         printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
881                         goto out_buf;
882                 }
883 @@ -339,7 +313,7 @@ static uint find_free_dqentry(struct dqu
884                 goto out_buf;
885         }
886  #endif
887 -       if ((*err = write_blk(filp, blk, buf)) < 0) {
888 +       if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
889                 printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
890                 goto out_buf;
891         }
892 @@ -354,7 +328,7 @@ out_buf:
893  /* Insert reference to structure into the trie */
894  static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
895  {
896 -       struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
897 +       struct super_block *sb = dquot->dq_sb;
898         dqbuf_t buf;
899         int ret = 0, newson = 0, newact = 0;
900         __le32 *ref;
901 @@ -363,7 +337,7 @@ static int do_insert_tree(struct dquot *
902         if (!(buf = getdqbuf()))
903                 return -ENOMEM;
904         if (!*treeblk) {
905 -               ret = get_free_dqblk(filp, dquot->dq_type);
906 +               ret = get_free_dqblk(sb, dquot->dq_type);
907                 if (ret < 0)
908                         goto out_buf;
909                 *treeblk = ret;
910 @@ -371,7 +345,7 @@ static int do_insert_tree(struct dquot *
911                 newact = 1;
912         }
913         else {
914 -               if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
915 +               if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
916                         printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
917                         goto out_buf;
918                 }
919 @@ -394,10 +368,10 @@ static int do_insert_tree(struct dquot *
920                 ret = do_insert_tree(dquot, &newblk, depth+1);
921         if (newson && ret >= 0) {
922                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
923 -               ret = write_blk(filp, *treeblk, buf);
924 +               ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
925         }
926         else if (newact && ret < 0)
927 -               put_free_dqblk(filp, dquot->dq_type, buf, *treeblk);
928 +               put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
929  out_buf:
930         freedqbuf(buf);
931         return ret;
932 @@ -416,20 +390,15 @@ static inline int dq_insert_tree(struct 
933  static int v2_write_dquot(struct dquot *dquot)
934  {
935         int type = dquot->dq_type;
936 -       struct file *filp;
937 -       mm_segment_t fs;
938 -       loff_t offset;
939         ssize_t ret;
940         struct v2_disk_dqblk ddquot, empty;
941  
942         /* dq_off is guarded by dqio_sem */
943         if (!dquot->dq_off)
944                 if ((ret = dq_insert_tree(dquot)) < 0) {
945 -                       printk(KERN_ERR "VFS: Error %Zd occurred while creating quota.\n", ret);
946 +                       printk(KERN_ERR "VFS: Error %d occurred while creating quota.\n", ret);
947                         return ret;
948                 }
949 -       filp = sb_dqopt(dquot->dq_sb)->files[type];
950 -       offset = dquot->dq_off;
951         spin_lock(&dq_data_lock);
952         mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
953         /* Argh... We may need to write structure full of zeroes but that would be
954 @@ -439,10 +408,8 @@ static int v2_write_dquot(struct dquot *
955         if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
956                 ddquot.dqb_itime = cpu_to_le64(1);
957         spin_unlock(&dq_data_lock);
958 -       fs = get_fs();
959 -       set_fs(KERNEL_DS);
960 -       ret = filp->f_op->write(filp, (char *)&ddquot, sizeof(struct v2_disk_dqblk), &offset);
961 -       set_fs(fs);
962 +       ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
963 +             (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
964         if (ret != sizeof(struct v2_disk_dqblk)) {
965                 printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
966                 if (ret >= 0)
967 @@ -458,7 +425,8 @@ static int v2_write_dquot(struct dquot *
968  /* Free dquot entry in data block */
969  static int free_dqentry(struct dquot *dquot, uint blk)
970  {
971 -       struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
972 +       struct super_block *sb = dquot->dq_sb;
973 +       int type = dquot->dq_type;
974         struct v2_disk_dqdbheader *dh;
975         dqbuf_t buf = getdqbuf();
976         int ret = 0;
977 @@ -466,34 +434,39 @@ static int free_dqentry(struct dquot *dq
978         if (!buf)
979                 return -ENOMEM;
980         if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) {
981 -               printk(KERN_ERR "VFS: Quota structure has offset to other block (%u) than it should (%u).\n", blk, (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
982 +               printk(KERN_ERR "VFS: Quota structure has offset to other "
983 +                 "block (%u) than it should (%u).\n", blk,
984 +                 (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
985                 goto out_buf;
986         }
987 -       if ((ret = read_blk(filp, blk, buf)) < 0) {
988 +       if ((ret = read_blk(sb, type, blk, buf)) < 0) {
989                 printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
990                 goto out_buf;
991         }
992         dh = (struct v2_disk_dqdbheader *)buf;
993         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1);
994         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
995 -               if ((ret = remove_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0 ||
996 -                   (ret = put_free_dqblk(filp, dquot->dq_type, buf, blk)) < 0) {
997 -                       printk(KERN_ERR "VFS: Can't move quota data block (%u) to free list.\n", blk);
998 +               if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
999 +                   (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
1000 +                       printk(KERN_ERR "VFS: Can't move quota data block (%u) "
1001 +                         "to free list.\n", blk);
1002                         goto out_buf;
1003                 }
1004         }
1005         else {
1006 -               memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0, sizeof(struct v2_disk_dqblk));
1007 +               memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
1008 +                 sizeof(struct v2_disk_dqblk));
1009                 if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
1010                         /* Insert will write block itself */
1011 -                       if ((ret = insert_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0) {
1012 +                       if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
1013                                 printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
1014                                 goto out_buf;
1015                         }
1016                 }
1017                 else
1018 -                       if ((ret = write_blk(filp, blk, buf)) < 0) {
1019 -                               printk(KERN_ERR "VFS: Can't write quota data block %u\n", blk);
1020 +                       if ((ret = write_blk(sb, type, blk, buf)) < 0) {
1021 +                               printk(KERN_ERR "VFS: Can't write quota data "
1022 +                                 "block %u\n", blk);
1023                                 goto out_buf;
1024                         }
1025         }
1026 @@ -506,7 +479,8 @@ out_buf:
1027  /* Remove reference to dquot from tree */
1028  static int remove_tree(struct dquot *dquot, uint *blk, int depth)
1029  {
1030 -       struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
1031 +       struct super_block *sb = dquot->dq_sb;
1032 +       int type = dquot->dq_type;
1033         dqbuf_t buf = getdqbuf();
1034         int ret = 0;
1035         uint newblk;
1036 @@ -514,7 +488,7 @@ static int remove_tree(struct dquot *dqu
1037         
1038         if (!buf)
1039                 return -ENOMEM;
1040 -       if ((ret = read_blk(filp, *blk, buf)) < 0) {
1041 +       if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
1042                 printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
1043                 goto out_buf;
1044         }
1045 @@ -530,12 +504,13 @@ static int remove_tree(struct dquot *dqu
1046                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
1047                 for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++);  /* Block got empty? */
1048                 if (i == V2_DQBLKSIZE) {
1049 -                       put_free_dqblk(filp, dquot->dq_type, buf, *blk);
1050 +                       put_free_dqblk(sb, type, buf, *blk);
1051                         *blk = 0;
1052                 }
1053                 else
1054 -                       if ((ret = write_blk(filp, *blk, buf)) < 0)
1055 -                               printk(KERN_ERR "VFS: Can't write quota tree block %u.\n", *blk);
1056 +                       if ((ret = write_blk(sb, type, *blk, buf)) < 0)
1057 +                               printk(KERN_ERR "VFS: Can't write quota tree "
1058 +                                 "block %u.\n", *blk);
1059         }
1060  out_buf:
1061         freedqbuf(buf);
1062 @@ -555,7 +530,6 @@ static int v2_delete_dquot(struct dquot 
1063  /* Find entry in block */
1064  static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
1065  {
1066 -       struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
1067         dqbuf_t buf = getdqbuf();
1068         loff_t ret = 0;
1069         int i;
1070 @@ -563,27 +537,31 @@ static loff_t find_block_dqentry(struct 
1071  
1072         if (!buf)
1073                 return -ENOMEM;
1074 -       if ((ret = read_blk(filp, blk, buf)) < 0) {
1075 +       if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
1076                 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
1077                 goto out_buf;
1078         }
1079         if (dquot->dq_id)
1080 -               for (i = 0; i < V2_DQSTRINBLK && le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
1081 +               for (i = 0; i < V2_DQSTRINBLK &&
1082 +                    le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
1083         else {  /* ID 0 as a bit more complicated searching... */
1084                 struct v2_disk_dqblk fakedquot;
1085  
1086                 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
1087                 for (i = 0; i < V2_DQSTRINBLK; i++)
1088 -                       if (!le32_to_cpu(ddquot[i].dqb_id) && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
1089 +                       if (!le32_to_cpu(ddquot[i].dqb_id) &&
1090 +                           memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
1091                                 break;
1092         }
1093         if (i == V2_DQSTRINBLK) {
1094 -               printk(KERN_ERR "VFS: Quota for id %u referenced but not present.\n", dquot->dq_id);
1095 +               printk(KERN_ERR "VFS: Quota for id %u referenced "
1096 +                 "but not present.\n", dquot->dq_id);
1097                 ret = -EIO;
1098                 goto out_buf;
1099         }
1100         else
1101 -               ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
1102 +               ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
1103 +                 v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
1104  out_buf:
1105         freedqbuf(buf);
1106         return ret;
1107 @@ -592,14 +570,13 @@ out_buf:
1108  /* Find entry for given id in the tree */
1109  static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
1110  {
1111 -       struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
1112         dqbuf_t buf = getdqbuf();
1113         loff_t ret = 0;
1114         __le32 *ref = (__le32 *)buf;
1115  
1116         if (!buf)
1117                 return -ENOMEM;
1118 -       if ((ret = read_blk(filp, blk, buf)) < 0) {
1119 +       if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
1120                 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
1121                 goto out_buf;
1122         }
1123 @@ -625,16 +602,13 @@ static inline loff_t find_dqentry(struct
1124  static int v2_read_dquot(struct dquot *dquot)
1125  {
1126         int type = dquot->dq_type;
1127 -       struct file *filp;
1128 -       mm_segment_t fs;
1129         loff_t offset;
1130         struct v2_disk_dqblk ddquot, empty;
1131         int ret = 0;
1132  
1133 -       filp = sb_dqopt(dquot->dq_sb)->files[type];
1134 -
1135  #ifdef __QUOTA_V2_PARANOIA
1136 -       if (!filp || !dquot->dq_sb) {   /* Invalidated quota? */
1137 +       /* Invalidated quota? */
1138 +       if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
1139                 printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
1140                 return -EIO;
1141         }
1142 @@ -642,7 +616,8 @@ static int v2_read_dquot(struct dquot *d
1143         offset = find_dqentry(dquot);
1144         if (offset <= 0) {      /* Entry not present? */
1145                 if (offset < 0)
1146 -                       printk(KERN_ERR "VFS: Can't read quota structure for id %u.\n", dquot->dq_id);
1147 +                       printk(KERN_ERR "VFS: Can't read quota "
1148 +                         "structure for id %u.\n", dquot->dq_id);
1149                 dquot->dq_off = 0;
1150                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
1151                 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
1152 @@ -650,12 +625,13 @@ static int v2_read_dquot(struct dquot *d
1153         }
1154         else {
1155                 dquot->dq_off = offset;
1156 -               fs = get_fs();
1157 -               set_fs(KERNEL_DS);
1158 -               if ((ret = filp->f_op->read(filp, (char *)&ddquot, sizeof(struct v2_disk_dqblk), &offset)) != sizeof(struct v2_disk_dqblk)) {
1159 +               if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
1160 +                   (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
1161 +                   != sizeof(struct v2_disk_dqblk)) {
1162                         if (ret >= 0)
1163                                 ret = -EIO;
1164 -                       printk(KERN_ERR "VFS: Error while reading quota structure for id %u.\n", dquot->dq_id);
1165 +                       printk(KERN_ERR "VFS: Error while reading quota "
1166 +                         "structure for id %u.\n", dquot->dq_id);
1167                         memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
1168                 }
1169                 else {
1170 @@ -666,7 +642,6 @@ static int v2_read_dquot(struct dquot *d
1171                         if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
1172                                 ddquot.dqb_itime = 0;
1173                 }
1174 -               set_fs(fs);
1175                 disk2memdqb(&dquot->dq_dqb, &ddquot);
1176                 if (!dquot->dq_dqb.dqb_bhardlimit &&
1177                         !dquot->dq_dqb.dqb_bsoftlimit &&
1178 diff -puN include/linux/fs.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/fs.h
1179 --- 25/include/linux/fs.h~fix-of-quota-deadlock-on-pagelock-quota-core  2004-12-03 20:56:04.300106472 -0800
1180 +++ 25-akpm/include/linux/fs.h  2004-12-03 20:56:04.319103584 -0800
1181 @@ -1004,6 +1004,9 @@ struct super_operations {
1182         void (*umount_begin) (struct super_block *);
1183
1184         int (*show_options)(struct seq_file *, struct vfsmount *);
1185 +
1186 +       ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
1187 +       ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1188  };
1189
1190  /* Inode state bits.  Protected by inode_lock. */
1191
1192 diff -puN include/linux/quota.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/quota.h
1193 --- 25/include/linux/quota.h~fix-of-quota-deadlock-on-pagelock-quota-core       2004-12-03 20:56:04.301106320 -0800
1194 +++ 25-akpm/include/linux/quota.h       2004-12-03 20:56:04.320103432 -0800
1195 @@ -285,7 +285,7 @@ struct quota_info {
1196         struct semaphore dqio_sem;              /* lock device while I/O in progress */
1197         struct semaphore dqonoff_sem;           /* Serialize quotaon & quotaoff */
1198         struct rw_semaphore dqptr_sem;          /* serialize ops using quota_info struct, pointers from inode to dquots */
1199 -       struct file *files[MAXQUOTAS];          /* fp's to quotafiles */
1200 +       struct inode *files[MAXQUOTAS];         /* inodes of quotafiles */
1201         struct mem_dqinfo info[MAXQUOTAS];      /* Information for each quota type */
1202         struct quota_format_ops *ops[MAXQUOTAS];        /* Operations for each type */
1203  };
1204 diff -puN include/linux/security.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/security.h
1205 --- 25/include/linux/security.h~fix-of-quota-deadlock-on-pagelock-quota-core    2004-12-03 20:56:04.303106016 -0800
1206 +++ 25-akpm/include/linux/security.h    2004-12-03 20:56:04.322103128 -0800
1207 @@ -1033,7 +1033,7 @@ struct security_operations {
1208         int (*sysctl) (ctl_table * table, int op);
1209         int (*capable) (struct task_struct * tsk, int cap);
1210         int (*quotactl) (int cmds, int type, int id, struct super_block * sb);
1211 -       int (*quota_on) (struct file * f);
1212 +       int (*quota_on) (struct dentry * dentry);
1213         int (*syslog) (int type);
1214         int (*vm_enough_memory) (long pages);
1215
1216 @@ -1281,9 +1281,9 @@ static inline int security_quotactl (int
1217         return security_ops->quotactl (cmds, type, id, sb);
1218  }
1219  
1220 -static inline int security_quota_on (struct file * file)
1221 +static inline int security_quota_on (struct dentry * dentry)
1222  {
1223 -       return security_ops->quota_on (file);
1224 +       return security_ops->quota_on (dentry);
1225  }
1226  
1227  static inline int security_syslog(int type)
1228 @@ -1959,7 +1959,7 @@ static inline int security_quotactl (int
1229         return 0;
1230  }
1231  
1232 -static inline int security_quota_on (struct file * file)
1233 +static inline int security_quota_on (struct dentry * dentry)
1234  {
1235         return 0;
1236  }
1237 diff -puN security/dummy.c~fix-of-quota-deadlock-on-pagelock-quota-core security/dummy.c
1238 --- 25/security/dummy.c~fix-of-quota-deadlock-on-pagelock-quota-core    2004-12-03 20:56:04.304105864 -0800
1239 +++ 25-akpm/security/dummy.c    2004-12-03 20:56:04.323102976 -0800
1240 @@ -92,7 +92,7 @@ static int dummy_quotactl (int cmds, int
1241         return 0;
1242  }
1243  
1244 -static int dummy_quota_on (struct file *f)
1245 +static int dummy_quota_on (struct dentry *dentry)
1246  {
1247         return 0;
1248  }
1249 diff -puN security/selinux/hooks.c~fix-of-quota-deadlock-on-pagelock-quota-core security/selinux/hooks.c
1250 --- 25/security/selinux/hooks.c~fix-of-quota-deadlock-on-pagelock-quota-core    2004-12-03 20:56:04.306105560 -0800
1251 +++ 25-akpm/security/selinux/hooks.c    2004-12-03 20:56:04.326102520 -0800
1252 @@ -1494,9 +1494,9 @@ static int selinux_quotactl(int cmds, in
1253         return rc;
1254  }
1255  
1256 -static int selinux_quota_on(struct file *f)
1257 +static int selinux_quota_on(struct dentry *dentry)
1258  {
1259 -       return file_has_perm(current, f, FILE__QUOTAON);
1260 +       return dentry_has_perm(current, NULL, dentry, FILE__QUOTAON);
1261  }
1262  
1263  static int selinux_syslog(int type)
1264 _