Whamcloud - gitweb
land b1_5 onto HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / quota-deadlock-on-pagelock-core.patch
diff --git a/lustre/kernel_patches/patches/quota-deadlock-on-pagelock-core.patch b/lustre/kernel_patches/patches/quota-deadlock-on-pagelock-core.patch
new file mode 100644 (file)
index 0000000..892a61f
--- /dev/null
@@ -0,0 +1,1264 @@
+
+From: Jan Kara <jack@suse.cz>
+
+The four patches in this series fix deadlocks with quotas of pagelock (the
+problem was lock inversion on PageLock and transaction start - quota code
+needed to first start a transaction and then write the data which subsequently
+needed acquisition of PageLock while the standard ordering - PageLock first
+and transaction start later - was used e.g.  by pdflush).  They implement a
+new way of quota access to disk: Every filesystem that would like to implement
+quotas now has to provide quota_read() and quota_write() functions.  These
+functions must obey quota lock ordering (in particular they should not take
+PageLock inside a transaction).
+
+The first patch implements the changes in the quota core, the other three
+patches implement needed functions in ext2, ext3 and reiserfs.  The patch for
+reiserfs also fixes several other lock inversion problems (similar as ext3
+had) and implements the journaled quota functionality (which comes almost for
+free after the locking fixes...).
+
+The quota core patch makes quota support in other filesystems (except XFS
+which implements everything on its own ;)) unfunctional (quotaon() will refuse
+to turn on quotas on them).  When the patches get reasonable wide testing and
+it will seem that no major changes will be needed I can make fixes also for
+the other filesystems (JFS, UDF, UFS).
+
+This patch:
+
+The patch implements the new way of quota io in the quota core.  Every
+filesystem wanting to support quotas has to provide functions quota_read()
+and quota_write() obeying quota locking rules.  As the writes and reads
+bypass the pagecache there is some ugly stuff ensuring that userspace can
+see all the data after quotaoff() (or Q_SYNC quotactl).  In future I plan
+to make quota files inaccessible from userspace (with the exception of
+quotacheck(8) which will take care about the cache flushing and such stuff
+itself) so that this synchronization stuff can be removed...
+
+The rewrite of the quota core. Quota uses the filesystem read() and write()
+functions no more to avoid possible deadlocks on PageLock. From now on every
+filesystem supporting quotas must provide functions quota_read() and
+quota_write() which obey the quota locking rules (e.g. they cannot acquire the
+PageLock).
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+---
+
+ 25-akpm/fs/dquot.c               |  162 +++++++++++++--------------
+ 25-akpm/fs/quota.c               |   45 +++++++
+ 25-akpm/fs/quota_v1.c            |   62 ++--------
+ 25-akpm/fs/quota_v2.c            |  227 +++++++++++++++++----------------------
+ 25-akpm/include/linux/fs.h       |    3 
+ 25-akpm/include/linux/quota.h    |    2 
+ 25-akpm/include/linux/security.h |    8 -
+ 25-akpm/security/dummy.c         |    2 
+ 25-akpm/security/selinux/hooks.c |    4 
+ 9 files changed, 247 insertions(+), 268 deletions(-)
+
+diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c
+--- 25/fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.293107536 -0800
++++ 25-akpm/fs/dquot.c 2004-12-03 20:56:04.312104648 -0800
+@@ -49,7 +49,7 @@
+  *            New SMP locking.
+  *            Jan Kara, <jack@suse.cz>, 10/2002
+  *
+- *            Added journalled quota support
++ *            Added journalled quota support, fix lock inversion problems
+  *            Jan Kara, <jack@suse.cz>, 2003,2004
+  *
+  * (C) Copyright 1994 - 1997 Marco van Wieringen 
+@@ -75,7 +75,8 @@
+ #include <linux/proc_fs.h>
+ #include <linux/security.h>
+ #include <linux/kmod.h>
+-#include <linux/pagemap.h>
++#include <linux/namei.h>
++#include <linux/buffer_head.h>
+ #include <asm/uaccess.h>
+@@ -114,7 +115,7 @@
+  * operations on dquots don't hold dq_lock as they copy data under dq_data_lock
+  * spinlock to internal buffers before writing.
+  *
+- * Lock ordering (including related VFS locks) is following:
++ * Lock ordering (including related VFS locks) is the following:
+  *   i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
+  *   > dquot->dq_lock > dqio_sem
+  * i_sem on quota files is special (it's below dqio_sem)
+@@ -183,8 +184,7 @@ static void put_quota_format(struct quot
+  * on all three lists, depending on its current state.
+  *
+  * All dquots are placed to the end of inuse_list when first created, and this
+- * list is used for the sync and invalidate operations, which must look
+- * at every dquot.
++ * list is used for invalidate operation, which must look at every dquot.
+  *
+  * Unused dquots (dq_count == 0) are added to the free_dquots list when freed,
+  * and this list is searched whenever we need an available dquot.  Dquots are
+@@ -1314,10 +1314,12 @@ int vfs_quota_off(struct super_block *sb
+ {
+       int cnt;
+       struct quota_info *dqopt = sb_dqopt(sb);
++      struct inode *toput[MAXQUOTAS];
+       /* We need to serialize quota_off() for device */
+       down(&dqopt->dqonoff_sem);
+       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++              toput[cnt] = NULL;
+               if (type != -1 && cnt != type)
+                       continue;
+               if (!sb_has_quota_enabled(sb, cnt))
+@@ -1337,7 +1339,7 @@ int vfs_quota_off(struct super_block *sb
+                       dqopt->ops[cnt]->free_file_info(sb, cnt);
+               put_quota_format(dqopt->info[cnt].dqi_format);
+-              fput(dqopt->files[cnt]);
++              toput[cnt] = dqopt->files[cnt];
+               dqopt->files[cnt] = NULL;
+               dqopt->info[cnt].dqi_flags = 0;
+               dqopt->info[cnt].dqi_igrace = 0;
+@@ -1345,6 +1347,26 @@ int vfs_quota_off(struct super_block *sb
+               dqopt->ops[cnt] = NULL;
+       }
+       up(&dqopt->dqonoff_sem);
++      /* Sync the superblock so that buffers with quota data are written to
++         * disk (and so userspace sees correct data afterwards) */
++      if (sb->s_op->sync_fs)
++              sb->s_op->sync_fs(sb, 1);
++      sync_blockdev(sb->s_bdev);
++      /* Now the quota files are just ordinary files and we can set the
++       * inode flags back. Moreover we discard the pagecache so that
++       * userspace sees the writes we did bypassing the pagecache. We
++       * must also discard the blockdev buffers so that we see the
++       * changes done by userspace on the next quotaon() */
++      for (cnt = 0; cnt < MAXQUOTAS; cnt++)
++              if (toput[cnt]) {
++                      down(&toput[cnt]->i_sem);
++                      toput[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA);
++                      truncate_inode_pages(&toput[cnt]->i_data, 0);
++                      up(&toput[cnt]->i_sem);
++                      mark_inode_dirty(toput[cnt]);
++                      iput(toput[cnt]);
++              }
++      invalidate_bdev(sb->s_bdev, 0);
+       return 0;
+ }
+@@ -1352,68 +1374,56 @@ int vfs_quota_off(struct super_block *sb
+  *    Turn quotas on on a device
+  */
+-/* Helper function when we already have file open */
+-static int vfs_quota_on_file(struct file *f, int type, int format_id)
++/* Helper function when we already have the inode */
++static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
+ {
+       struct quota_format_type *fmt = find_quota_format(format_id);
+-      struct inode *inode;
+-      struct super_block *sb = f->f_dentry->d_sb;
++      struct super_block *sb = inode->i_sb;
+       struct quota_info *dqopt = sb_dqopt(sb);
+-      struct dquot *to_drop[MAXQUOTAS];
+-      int error, cnt;
+-      unsigned int oldflags = -1;
++      int error;
++      int oldflags = -1;
+       if (!fmt)
+               return -ESRCH;
+-      error = -EIO;
+-      if (!f->f_op || !f->f_op->read || !f->f_op->write)
++      if (!S_ISREG(inode->i_mode)) {
++              error = -EACCES;
+               goto out_fmt;
+-      inode = f->f_dentry->d_inode;
+-      error = -EACCES;
+-      if (!S_ISREG(inode->i_mode))
++      }
++      if (IS_RDONLY(inode)) {
++              error = -EROFS;
++              goto out_fmt;
++      }
++      if (!sb->s_op->quota_write || !sb->s_op->quota_read) {
++              error = -EINVAL;
+               goto out_fmt;
++      }
++      /* As we bypass the pagecache we must now flush the inode so that
++       * we see all the changes from userspace... */
++      write_inode_now(inode, 1);
++      /* And now flush the block cache so that kernel sees the changes */
++      invalidate_bdev(sb->s_bdev, 0);
+       down(&inode->i_sem);
+       down(&dqopt->dqonoff_sem);
+       if (sb_has_quota_enabled(sb, type)) {
+-              up(&inode->i_sem);
+               error = -EBUSY;
+               goto out_lock;
+       }
+       /* We don't want quota and atime on quota files (deadlocks possible)
+-       * We also need to set GFP mask differently because we cannot recurse
+-       * into filesystem when allocating page for quota inode */
++       * Also nobody should write to the file - we use special IO operations
++       * which ignore the immutable bit. */
+       down_write(&dqopt->dqptr_sem);
+-      oldflags = inode->i_flags & (S_NOATIME | S_NOQUOTA);
+-      inode->i_flags |= S_NOQUOTA | S_NOATIME;
++      oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
++      inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
+       up_write(&dqopt->dqptr_sem);
+-      up(&inode->i_sem);
+-      dqopt->files[type] = f;
++      error = -EIO;
++      dqopt->files[type] = igrab(inode);
++      if (!dqopt->files[type])
++              goto out_lock;
+       error = -EINVAL;
+       if (!fmt->qf_ops->check_quota_file(sb, type))
+               goto out_file_init;
+-      /*
+-       * We write to quota files deep within filesystem code.  We don't want
+-       * the VFS to reenter filesystem code when it tries to allocate a
+-       * pagecache page for the quota file write.  So clear __GFP_FS in
+-       * the quota file's allocation flags.
+-       */
+-      mapping_set_gfp_mask(inode->i_mapping,
+-              mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+-
+-      down_write(&dqopt->dqptr_sem);
+-      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+-              to_drop[cnt] = inode->i_dquot[cnt];
+-              inode->i_dquot[cnt] = NODQUOT;
+-      }
+-      up_write(&dqopt->dqptr_sem);
+-      /* We must put dquots outside of dqptr_sem because we may need to
+-       * start transaction for dquot_release() */
+-      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+-              if (to_drop[cnt])
+-                      dqput(to_drop[cnt]);
+-      }
+       dqopt->ops[type] = fmt->qf_ops;
+       dqopt->info[type].dqi_format = fmt;
+@@ -1424,6 +1434,7 @@ static int vfs_quota_on_file(struct file
+               goto out_file_init;
+       }
+       up(&dqopt->dqio_sem);
++      up(&inode->i_sem);
+       set_enable_flags(dqopt, type);
+       add_dquot_ref(sb, type);
+@@ -1433,19 +1444,18 @@ static int vfs_quota_on_file(struct file
+ out_file_init:
+       dqopt->files[type] = NULL;
++      iput(inode);
+ out_lock:
+       up(&dqopt->dqonoff_sem);
+       if (oldflags != -1) {
+-              down(&inode->i_sem);
+               down_write(&dqopt->dqptr_sem);
+-              /* Reset the NOATIME flag back. I know it could change in the
+-               * mean time but playing with NOATIME flags on a quota file is
+-               * never a good idea */
+-              inode->i_flags &= ~(S_NOATIME | S_NOQUOTA);
++              /* Set the flags back (in the case of accidental quotaon()
++               * on a wrong file we don't want to mess up the flags) */
++              inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
+               inode->i_flags |= oldflags;
+               up_write(&dqopt->dqptr_sem);
+-              up(&inode->i_sem);
+       }
++      up(&inode->i_sem);
+ out_fmt:
+       put_quota_format(fmt);
+@@ -1455,47 +1465,37 @@ out_fmt:
+ /* Actual function called from quotactl() */
+ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
+ {
+-      struct file *f;
++      struct nameidata nd;
+       int error;
+-      f = filp_open(path, O_RDWR, 0600);
+-      if (IS_ERR(f))
+-              return PTR_ERR(f);
+-      error = security_quota_on(f);
++      error = path_lookup(path, LOOKUP_FOLLOW, &nd);
++      if (error < 0)
++              return error;
++      error = security_quota_on(nd.dentry);
+       if (error)
+-              goto out_f;
+-      error = vfs_quota_on_file(f, type, format_id);
+-      if (!error)
+-              return 0;
+-out_f:
+-      filp_close(f, NULL);
++              goto out_path;
++      /* Quota file not on the same filesystem? */
++      if (nd.mnt->mnt_sb != sb)
++              error = -EXDEV;
++      else
++              error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
++out_path:
++      path_release(&nd);
+       return error;
+ }
+ /*
+- * Function used by filesystems when filp_open() would fail (filesystem is
+- * being mounted now). We will use a private file structure. Caller is
+- * responsible that it's IO functions won't need vfsmnt structure or
+- * some dentry tricks...
++ * This function is used when filesystem needs to initialize quotas
++ * during mount time.
+  */
+ int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry)
+ {
+-      struct file *f;
+       int error;
+-      dget(dentry);   /* Get a reference for struct file */
+-      f = dentry_open(dentry, NULL, O_RDWR);
+-      if (IS_ERR(f)) {
+-              error = PTR_ERR(f);
+-              goto out_dentry;
+-      }
+-      error = vfs_quota_on_file(f, type, format_id);
+-      if (!error)
+-              return 0;
+-      fput(f);
+-out_dentry:
+-      dput(dentry);
+-      return error;
++      error = security_quota_on(dentry);
++      if (error)
++              return error;
++      return vfs_quota_on_inode(dentry->d_inode, type, format_id);
+ }
+ /* Generic routine for getting common part of quota structure */
+diff -puN fs/quota.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota.c
+--- 25/fs/quota.c~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.295107232 -0800
++++ 25-akpm/fs/quota.c 2004-12-03 20:56:04.313104496 -0800
+@@ -13,6 +13,8 @@
+ #include <linux/kernel.h>
+ #include <linux/smp_lock.h>
+ #include <linux/security.h>
++#include <linux/syscalls.h>
++#include <linux/buffer_head.h>
+
+ /* Check validity of quotactl */
+ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
+@@ -135,16 +136,54 @@ restart:
+       return NULL;
+ }
++void quota_sync_sb(struct super_block *sb, int type)
++{
++      int cnt;
++      struct inode *discard[MAXQUOTAS];
++
++      sb->s_qcop->quota_sync(sb, type);
++      /* This is not very clever (and fast) but currently I don't know about
++       * any other simple way of getting quota data to disk and we must get
++       * them there for userspace to be visible... */
++      if (sb->s_op->sync_fs)
++              sb->s_op->sync_fs(sb, 1);
++      sync_blockdev(sb->s_bdev);
++
++      /* Now when everything is written we can discard the pagecache so
++       * that userspace sees the changes. We need i_sem and so we could
++       * not do it inside dqonoff_sem. Moreover we need to be carefull
++       * about races with quotaoff() (that is the reason why we have own
++       * reference to inode). */
++      down(&sb_dqopt(sb)->dqonoff_sem);
++      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++              discard[cnt] = NULL;
++              if (type != -1 && cnt != type)
++                      continue;
++              if (!sb_has_quota_enabled(sb, cnt))
++                      continue;
++              discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]);
++      }
++      up(&sb_dqopt(sb)->dqonoff_sem);
++      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++              if (discard[cnt]) {
++                      down(&discard[cnt]->i_sem);
++                      truncate_inode_pages(&discard[cnt]->i_data, 0);
++                      up(&discard[cnt]->i_sem);
++                      iput(discard[cnt]);
++              }
++      }
++}
++
+ void sync_dquots(struct super_block *sb, int type)
+ {
+       if (sb) {
+               if (sb->s_qcop->quota_sync)
+-                      sb->s_qcop->quota_sync(sb, type);
++                      quota_sync_sb(sb, type);
+       }
+       else {
+-              while ((sb = get_super_to_sync(type)) != 0) {
++              while ((sb = get_super_to_sync(type)) != NULL) {
+                       if (sb->s_qcop->quota_sync)
+-                              sb->s_qcop->quota_sync(sb, type);
++                              quota_sync_sb(sb, type);
+                       drop_super(sb);
+               }
+       }
+diff -puN fs/quota_v1.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota_v1.c
+--- 25/fs/quota_v1.c~fix-of-quota-deadlock-on-pagelock-quota-core      2004-12-03 20:56:04.296107080 -0800
++++ 25-akpm/fs/quota_v1.c      2004-12-03 20:56:04.314104344 -0800
+@@ -7,7 +7,6 @@
+ #include <linux/init.h>
+ #include <linux/module.h>
+-#include <asm/uaccess.h>
+ #include <asm/byteorder.h>
+ MODULE_AUTHOR("Jan Kara");
+@@ -41,23 +40,14 @@ static void v1_mem2disk_dqblk(struct v1_
+ static int v1_read_dqblk(struct dquot *dquot)
+ {
+       int type = dquot->dq_type;
+-      struct file *filp;
+-      mm_segment_t fs;
+-      loff_t offset;
+       struct v1_disk_dqblk dqblk;
+-      filp = sb_dqopt(dquot->dq_sb)->files[type];
+-      if (filp == (struct file *)NULL)
++      if (!sb_dqopt(dquot->dq_sb)->files[type])
+               return -EINVAL;
+-      /* Now we are sure filp is valid */
+-      offset = v1_dqoff(dquot->dq_id);
+       /* Set structure to 0s in case read fails/is after end of file */
+       memset(&dqblk, 0, sizeof(struct v1_disk_dqblk));
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset);
+-      set_fs(fs);
++      dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
+       v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk);
+       if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 &&
+@@ -71,26 +61,18 @@ static int v1_read_dqblk(struct dquot *d
+ static int v1_commit_dqblk(struct dquot *dquot)
+ {
+       short type = dquot->dq_type;
+-      struct file *filp;
+-      mm_segment_t fs;
+-      loff_t offset;
+       ssize_t ret;
+       struct v1_disk_dqblk dqblk;
+-      filp = sb_dqopt(dquot->dq_sb)->files[type];
+-      offset = v1_dqoff(dquot->dq_id);
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-
+       v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
+       if (dquot->dq_id == 0) {
+               dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
+               dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace;
+       }
+       ret = 0;
+-      if (filp)
+-              ret = filp->f_op->write(filp, (char *)&dqblk,
+-                                      sizeof(struct v1_disk_dqblk), &offset);
++      if (sb_dqopt(dquot->dq_sb)->files[type])
++              ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, (char *)&dqblk,
++                                      sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
+       if (ret != sizeof(struct v1_disk_dqblk)) {
+               printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+                       dquot->dq_sb->s_id);
+@@ -101,7 +83,6 @@ static int v1_commit_dqblk(struct dquot 
+       ret = 0;
+ out:
+-      set_fs(fs);
+       dqstats.writes++;
+       return ret;
+@@ -121,14 +102,11 @@ struct v2_disk_dqheader {
+ static int v1_check_quota_file(struct super_block *sb, int type)
+ {
+-      struct file *f = sb_dqopt(sb)->files[type];
+-      struct inode *inode = f->f_dentry->d_inode;
++      struct inode *inode = sb_dqopt(sb)->files[type];
+       ulong blocks;
+       size_t off; 
+       struct v2_disk_dqheader dqhead;
+-      mm_segment_t fs;
+       ssize_t size;
+-      loff_t offset = 0;
+       loff_t isize;
+       static const uint quota_magics[] = V2_INITQMAGICS;
+@@ -140,10 +118,7 @@ static int v1_check_quota_file(struct su
+       if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk))
+               return 0;
+       /* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      size = f->f_op->read(f, (char *)&dqhead, sizeof(struct v2_disk_dqheader), &offset);
+-      set_fs(fs);
++      size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
+       if (size != sizeof(struct v2_disk_dqheader))
+               return 1;       /* Probably not new format */
+       if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type])
+@@ -155,16 +130,10 @@ static int v1_check_quota_file(struct su
+ static int v1_read_file_info(struct super_block *sb, int type)
+ {
+       struct quota_info *dqopt = sb_dqopt(sb);
+-      mm_segment_t fs;
+-      loff_t offset;
+-      struct file *filp = dqopt->files[type];
+       struct v1_disk_dqblk dqblk;
+       int ret;
+-      offset = v1_dqoff(0);
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      if ((ret = filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset)) != sizeof(struct v1_disk_dqblk)) {
++      if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
+               if (ret >= 0)
+                       ret = -EIO;
+               goto out;
+@@ -173,38 +142,31 @@ static int v1_read_file_info(struct supe
+       dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME;
+       dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME;
+ out:
+-      set_fs(fs);
+       return ret;
+ }
+ static int v1_write_file_info(struct super_block *sb, int type)
+ {
+       struct quota_info *dqopt = sb_dqopt(sb);
+-      mm_segment_t fs;
+-      struct file *filp = dqopt->files[type];
+       struct v1_disk_dqblk dqblk;
+-      loff_t offset;
+       int ret;
+       dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY;
+-      offset = v1_dqoff(0);
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      if ((ret = filp->f_op->read(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset)) != sizeof(struct v1_disk_dqblk)) {
++      if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk,
++          sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
+               if (ret >= 0)
+                       ret = -EIO;
+               goto out;
+       }
+       dqblk.dqb_itime = dqopt->info[type].dqi_igrace;
+       dqblk.dqb_btime = dqopt->info[type].dqi_bgrace;
+-      offset = v1_dqoff(0);
+-      ret = filp->f_op->write(filp, (char *)&dqblk, sizeof(struct v1_disk_dqblk), &offset);
++      ret = sb->s_op->quota_write(sb, type, (char *)&dqblk,
++            sizeof(struct v1_disk_dqblk), v1_dqoff(0));
+       if (ret == sizeof(struct v1_disk_dqblk))
+               ret = 0;
+       else if (ret > 0)
+               ret = -EIO;
+ out:
+-      set_fs(fs);
+       return ret;
+ }
+diff -puN fs/quota_v2.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota_v2.c
+--- 25/fs/quota_v2.c~fix-of-quota-deadlock-on-pagelock-quota-core      2004-12-03 20:56:04.297106928 -0800
++++ 25-akpm/fs/quota_v2.c      2004-12-03 20:56:04.318103736 -0800
+@@ -13,7 +13,6 @@
+ #include <linux/slab.h>
+ #include <asm/byteorder.h>
+-#include <asm/uaccess.h>
+ MODULE_AUTHOR("Jan Kara");
+ MODULE_DESCRIPTION("Quota format v2 support");
+@@ -30,19 +29,15 @@ typedef char *dqbuf_t;
+ static int v2_check_quota_file(struct super_block *sb, int type)
+ {
+       struct v2_disk_dqheader dqhead;
+-      struct file *f = sb_dqopt(sb)->files[type];
+-      mm_segment_t fs;
+       ssize_t size;
+-      loff_t offset = 0;
+       static const uint quota_magics[] = V2_INITQMAGICS;
+       static const uint quota_versions[] = V2_INITQVERSIONS;
+  
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      size = f->f_op->read(f, (char *)&dqhead, sizeof(struct v2_disk_dqheader), &offset);
+-      set_fs(fs);
+-      if (size != sizeof(struct v2_disk_dqheader))
++      size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
++      if (size != sizeof(struct v2_disk_dqheader)) {
++              printk("failed read\n");
+               return 0;
++      }
+       if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
+           le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
+               return 0;
+@@ -52,20 +47,15 @@ static int v2_check_quota_file(struct su
+ /* Read information header from quota file */
+ static int v2_read_file_info(struct super_block *sb, int type)
+ {
+-      mm_segment_t fs;
+       struct v2_disk_dqinfo dinfo;
+       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+-      struct file *f = sb_dqopt(sb)->files[type];
+       ssize_t size;
+-      loff_t offset = V2_DQINFOOFF;
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      size = f->f_op->read(f, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), &offset);
+-      set_fs(fs);
++      size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
++             sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
+       if (size != sizeof(struct v2_disk_dqinfo)) {
+               printk(KERN_WARNING "Can't read info structure on device %s.\n",
+-                      f->f_dentry->d_sb->s_id);
++                      sb->s_id);
+               return -1;
+       }
+       info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
+@@ -80,12 +70,9 @@ static int v2_read_file_info(struct supe
+ /* Write information header to quota file */
+ static int v2_write_file_info(struct super_block *sb, int type)
+ {
+-      mm_segment_t fs;
+       struct v2_disk_dqinfo dinfo;
+       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+-      struct file *f = sb_dqopt(sb)->files[type];
+       ssize_t size;
+-      loff_t offset = V2_DQINFOOFF;
+       spin_lock(&dq_data_lock);
+       info->dqi_flags &= ~DQF_INFO_DIRTY;
+@@ -96,13 +83,11 @@ static int v2_write_file_info(struct sup
+       dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
+       dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
+       dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      size = f->f_op->write(f, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), &offset);
+-      set_fs(fs);
++      size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
++             sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
+       if (size != sizeof(struct v2_disk_dqinfo)) {
+               printk(KERN_WARNING "Can't write info structure on device %s.\n",
+-                      f->f_dentry->d_sb->s_id);
++                      sb->s_id);
+               return -1;
+       }
+       return 0;
+@@ -146,39 +131,24 @@ static inline void freedqbuf(dqbuf_t buf
+       kfree(buf);
+ }
+-static ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
++static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
+ {
+-      mm_segment_t fs;
+-      ssize_t ret;
+-      loff_t offset = blk<<V2_DQBLKSIZE_BITS;
+-
+       memset(buf, 0, V2_DQBLKSIZE);
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      ret = filp->f_op->read(filp, (char *)buf, V2_DQBLKSIZE, &offset);
+-      set_fs(fs);
+-      return ret;
++      return sb->s_op->quota_read(sb, type, (char *)buf,
++             V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
+ }
+-static ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
++static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
+ {
+-      mm_segment_t fs;
+-      ssize_t ret;
+-      loff_t offset = blk<<V2_DQBLKSIZE_BITS;
+-
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      ret = filp->f_op->write(filp, (char *)buf, V2_DQBLKSIZE, &offset);
+-      set_fs(fs);
+-      return ret;
+-
++      return sb->s_op->quota_write(sb, type, (char *)buf,
++             V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
+ }
+ /* Remove empty block from list and return it */
+-static int get_free_dqblk(struct file *filp, int type)
++static int get_free_dqblk(struct super_block *sb, int type)
+ {
+       dqbuf_t buf = getdqbuf();
+-      struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type);
++      struct mem_dqinfo *info = sb_dqinfo(sb, type);
+       struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
+       int ret, blk;
+@@ -186,17 +156,18 @@ static int get_free_dqblk(struct file *f
+               return -ENOMEM;
+       if (info->u.v2_i.dqi_free_blk) {
+               blk = info->u.v2_i.dqi_free_blk;
+-              if ((ret = read_blk(filp, blk, buf)) < 0)
++              if ((ret = read_blk(sb, type, blk, buf)) < 0)
+                       goto out_buf;
+               info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
+       }
+       else {
+               memset(buf, 0, V2_DQBLKSIZE);
+-              if ((ret = write_blk(filp, info->u.v2_i.dqi_blocks, buf)) < 0)  /* Assure block allocation... */
++              /* Assure block allocation... */
++              if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
+                       goto out_buf;
+               blk = info->u.v2_i.dqi_blocks++;
+       }
+-      mark_info_dirty(filp->f_dentry->d_sb, type);
++      mark_info_dirty(sb, type);
+       ret = blk;
+ out_buf:
+       freedqbuf(buf);
+@@ -204,9 +175,9 @@ out_buf:
+ }
+ /* Insert empty block to the list */
+-static int put_free_dqblk(struct file *filp, int type, dqbuf_t buf, uint blk)
++static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
+ {
+-      struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type);
++      struct mem_dqinfo *info = sb_dqinfo(sb, type);
+       struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
+       int err;
+@@ -214,17 +185,18 @@ static int put_free_dqblk(struct file *f
+       dh->dqdh_prev_free = cpu_to_le32(0);
+       dh->dqdh_entries = cpu_to_le16(0);
+       info->u.v2_i.dqi_free_blk = blk;
+-      mark_info_dirty(filp->f_dentry->d_sb, type);
+-      if ((err = write_blk(filp, blk, buf)) < 0)      /* Some strange block. We had better leave it... */
++      mark_info_dirty(sb, type);
++      /* Some strange block. We had better leave it... */
++      if ((err = write_blk(sb, type, blk, buf)) < 0)
+               return err;
+       return 0;
+ }
+ /* Remove given block from the list of blocks with free entries */
+-static int remove_free_dqentry(struct file *filp, int type, dqbuf_t buf, uint blk)
++static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
+ {
+       dqbuf_t tmpbuf = getdqbuf();
+-      struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type);
++      struct mem_dqinfo *info = sb_dqinfo(sb, type);
+       struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
+       uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
+       int err;
+@@ -232,26 +204,27 @@ static int remove_free_dqentry(struct fi
+       if (!tmpbuf)
+               return -ENOMEM;
+       if (nextblk) {
+-              if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
++              if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
+                       goto out_buf;
+               ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
+-              if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
++              if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
+                       goto out_buf;
+       }
+       if (prevblk) {
+-              if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
++              if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
+                       goto out_buf;
+               ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
+-              if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
++              if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
+                       goto out_buf;
+       }
+       else {
+               info->u.v2_i.dqi_free_entry = nextblk;
+-              mark_info_dirty(filp->f_dentry->d_sb, type);
++              mark_info_dirty(sb, type);
+       }
+       freedqbuf(tmpbuf);
+       dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
+-      if (write_blk(filp, blk, buf) < 0)      /* No matter whether write succeeds block is out of list */
++      /* No matter whether write succeeds block is out of list */
++      if (write_blk(sb, type, blk, buf) < 0)
+               printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
+       return 0;
+ out_buf:
+@@ -260,10 +233,10 @@ out_buf:
+ }
+ /* Insert given block to the beginning of list with free entries */
+-static int insert_free_dqentry(struct file *filp, int type, dqbuf_t buf, uint blk)
++static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
+ {
+       dqbuf_t tmpbuf = getdqbuf();
+-      struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type);
++      struct mem_dqinfo *info = sb_dqinfo(sb, type);
+       struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
+       int err;
+@@ -271,18 +244,18 @@ static int insert_free_dqentry(struct fi
+               return -ENOMEM;
+       dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
+       dh->dqdh_prev_free = cpu_to_le32(0);
+-      if ((err = write_blk(filp, blk, buf)) < 0)
++      if ((err = write_blk(sb, type, blk, buf)) < 0)
+               goto out_buf;
+       if (info->u.v2_i.dqi_free_entry) {
+-              if ((err = read_blk(filp, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
++              if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
+                       goto out_buf;
+               ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
+-              if ((err = write_blk(filp, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
++              if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
+                       goto out_buf;
+       }
+       freedqbuf(tmpbuf);
+       info->u.v2_i.dqi_free_entry = blk;
+-      mark_info_dirty(filp->f_dentry->d_sb, type);
++      mark_info_dirty(sb, type);
+       return 0;
+ out_buf:
+       freedqbuf(tmpbuf);
+@@ -292,8 +265,8 @@ out_buf:
+ /* Find space for dquot */
+ static uint find_free_dqentry(struct dquot *dquot, int *err)
+ {
+-      struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+-      struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info+dquot->dq_type;
++      struct super_block *sb = dquot->dq_sb;
++      struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
+       uint blk, i;
+       struct v2_disk_dqdbheader *dh;
+       struct v2_disk_dqblk *ddquot;
+@@ -309,22 +282,23 @@ static uint find_free_dqentry(struct dqu
+       ddquot = GETENTRIES(buf);
+       if (info->u.v2_i.dqi_free_entry) {
+               blk = info->u.v2_i.dqi_free_entry;
+-              if ((*err = read_blk(filp, blk, buf)) < 0)
++              if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
+                       goto out_buf;
+       }
+       else {
+-              blk = get_free_dqblk(filp, dquot->dq_type);
++              blk = get_free_dqblk(sb, dquot->dq_type);
+               if ((int)blk < 0) {
+                       *err = blk;
+                       freedqbuf(buf);
+                       return 0;
+               }
+               memset(buf, 0, V2_DQBLKSIZE);
+-              info->u.v2_i.dqi_free_entry = blk;      /* This is enough as block is already zeroed and entry list is empty... */
+-              mark_info_dirty(dquot->dq_sb, dquot->dq_type);
++              /* This is enough as block is already zeroed and entry list is empty... */
++              info->u.v2_i.dqi_free_entry = blk;
++              mark_info_dirty(sb, dquot->dq_type);
+       }
+       if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)   /* Block will be full? */
+-              if ((*err = remove_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0) {
++              if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
+                       printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
+                       goto out_buf;
+               }
+@@ -339,7 +313,7 @@ static uint find_free_dqentry(struct dqu
+               goto out_buf;
+       }
+ #endif
+-      if ((*err = write_blk(filp, blk, buf)) < 0) {
++      if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
+               printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
+               goto out_buf;
+       }
+@@ -354,7 +328,7 @@ out_buf:
+ /* Insert reference to structure into the trie */
+ static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
+ {
+-      struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
++      struct super_block *sb = dquot->dq_sb;
+       dqbuf_t buf;
+       int ret = 0, newson = 0, newact = 0;
+       __le32 *ref;
+@@ -363,7 +337,7 @@ static int do_insert_tree(struct dquot *
+       if (!(buf = getdqbuf()))
+               return -ENOMEM;
+       if (!*treeblk) {
+-              ret = get_free_dqblk(filp, dquot->dq_type);
++              ret = get_free_dqblk(sb, dquot->dq_type);
+               if (ret < 0)
+                       goto out_buf;
+               *treeblk = ret;
+@@ -371,7 +345,7 @@ static int do_insert_tree(struct dquot *
+               newact = 1;
+       }
+       else {
+-              if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
++              if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
+                       printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
+                       goto out_buf;
+               }
+@@ -394,10 +368,10 @@ static int do_insert_tree(struct dquot *
+               ret = do_insert_tree(dquot, &newblk, depth+1);
+       if (newson && ret >= 0) {
+               ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
+-              ret = write_blk(filp, *treeblk, buf);
++              ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
+       }
+       else if (newact && ret < 0)
+-              put_free_dqblk(filp, dquot->dq_type, buf, *treeblk);
++              put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
+ out_buf:
+       freedqbuf(buf);
+       return ret;
+@@ -416,20 +390,15 @@ static inline int dq_insert_tree(struct 
+ static int v2_write_dquot(struct dquot *dquot)
+ {
+       int type = dquot->dq_type;
+-      struct file *filp;
+-      mm_segment_t fs;
+-      loff_t offset;
+       ssize_t ret;
+       struct v2_disk_dqblk ddquot, empty;
+       /* dq_off is guarded by dqio_sem */
+       if (!dquot->dq_off)
+               if ((ret = dq_insert_tree(dquot)) < 0) {
+-                      printk(KERN_ERR "VFS: Error %Zd occurred while creating quota.\n", ret);
++                      printk(KERN_ERR "VFS: Error %d occurred while creating quota.\n", ret);
+                       return ret;
+               }
+-      filp = sb_dqopt(dquot->dq_sb)->files[type];
+-      offset = dquot->dq_off;
+       spin_lock(&dq_data_lock);
+       mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+       /* Argh... We may need to write structure full of zeroes but that would be
+@@ -439,10 +408,8 @@ static int v2_write_dquot(struct dquot *
+       if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
+               ddquot.dqb_itime = cpu_to_le64(1);
+       spin_unlock(&dq_data_lock);
+-      fs = get_fs();
+-      set_fs(KERNEL_DS);
+-      ret = filp->f_op->write(filp, (char *)&ddquot, sizeof(struct v2_disk_dqblk), &offset);
+-      set_fs(fs);
++      ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
++            (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
+       if (ret != sizeof(struct v2_disk_dqblk)) {
+               printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
+               if (ret >= 0)
+@@ -458,7 +425,8 @@ static int v2_write_dquot(struct dquot *
+ /* Free dquot entry in data block */
+ static int free_dqentry(struct dquot *dquot, uint blk)
+ {
+-      struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
++      struct super_block *sb = dquot->dq_sb;
++      int type = dquot->dq_type;
+       struct v2_disk_dqdbheader *dh;
+       dqbuf_t buf = getdqbuf();
+       int ret = 0;
+@@ -466,34 +434,39 @@ static int free_dqentry(struct dquot *dq
+       if (!buf)
+               return -ENOMEM;
+       if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) {
+-              printk(KERN_ERR "VFS: Quota structure has offset to other block (%u) than it should (%u).\n", blk, (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
++              printk(KERN_ERR "VFS: Quota structure has offset to other "
++                "block (%u) than it should (%u).\n", blk,
++                (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
+               goto out_buf;
+       }
+-      if ((ret = read_blk(filp, blk, buf)) < 0) {
++      if ((ret = read_blk(sb, type, blk, buf)) < 0) {
+               printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
+               goto out_buf;
+       }
+       dh = (struct v2_disk_dqdbheader *)buf;
+       dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1);
+       if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
+-              if ((ret = remove_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0 ||
+-                  (ret = put_free_dqblk(filp, dquot->dq_type, buf, blk)) < 0) {
+-                      printk(KERN_ERR "VFS: Can't move quota data block (%u) to free list.\n", blk);
++              if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
++                  (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
++                      printk(KERN_ERR "VFS: Can't move quota data block (%u) "
++                        "to free list.\n", blk);
+                       goto out_buf;
+               }
+       }
+       else {
+-              memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0, sizeof(struct v2_disk_dqblk));
++              memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
++                sizeof(struct v2_disk_dqblk));
+               if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
+                       /* Insert will write block itself */
+-                      if ((ret = insert_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0) {
++                      if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
+                               printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
+                               goto out_buf;
+                       }
+               }
+               else
+-                      if ((ret = write_blk(filp, blk, buf)) < 0) {
+-                              printk(KERN_ERR "VFS: Can't write quota data block %u\n", blk);
++                      if ((ret = write_blk(sb, type, blk, buf)) < 0) {
++                              printk(KERN_ERR "VFS: Can't write quota data "
++                                "block %u\n", blk);
+                               goto out_buf;
+                       }
+       }
+@@ -506,7 +479,8 @@ out_buf:
+ /* Remove reference to dquot from tree */
+ static int remove_tree(struct dquot *dquot, uint *blk, int depth)
+ {
+-      struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
++      struct super_block *sb = dquot->dq_sb;
++      int type = dquot->dq_type;
+       dqbuf_t buf = getdqbuf();
+       int ret = 0;
+       uint newblk;
+@@ -514,7 +488,7 @@ static int remove_tree(struct dquot *dqu
+       
+       if (!buf)
+               return -ENOMEM;
+-      if ((ret = read_blk(filp, *blk, buf)) < 0) {
++      if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
+               printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
+               goto out_buf;
+       }
+@@ -530,12 +504,13 @@ static int remove_tree(struct dquot *dqu
+               ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
+               for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++);  /* Block got empty? */
+               if (i == V2_DQBLKSIZE) {
+-                      put_free_dqblk(filp, dquot->dq_type, buf, *blk);
++                      put_free_dqblk(sb, type, buf, *blk);
+                       *blk = 0;
+               }
+               else
+-                      if ((ret = write_blk(filp, *blk, buf)) < 0)
+-                              printk(KERN_ERR "VFS: Can't write quota tree block %u.\n", *blk);
++                      if ((ret = write_blk(sb, type, *blk, buf)) < 0)
++                              printk(KERN_ERR "VFS: Can't write quota tree "
++                                "block %u.\n", *blk);
+       }
+ out_buf:
+       freedqbuf(buf);
+@@ -555,7 +530,6 @@ static int v2_delete_dquot(struct dquot 
+ /* Find entry in block */
+ static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
+ {
+-      struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+       dqbuf_t buf = getdqbuf();
+       loff_t ret = 0;
+       int i;
+@@ -563,27 +537,31 @@ static loff_t find_block_dqentry(struct 
+       if (!buf)
+               return -ENOMEM;
+-      if ((ret = read_blk(filp, blk, buf)) < 0) {
++      if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
+               printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+               goto out_buf;
+       }
+       if (dquot->dq_id)
+-              for (i = 0; i < V2_DQSTRINBLK && le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
++              for (i = 0; i < V2_DQSTRINBLK &&
++                   le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
+       else {  /* ID 0 as a bit more complicated searching... */
+               struct v2_disk_dqblk fakedquot;
+               memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
+               for (i = 0; i < V2_DQSTRINBLK; i++)
+-                      if (!le32_to_cpu(ddquot[i].dqb_id) && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
++                      if (!le32_to_cpu(ddquot[i].dqb_id) &&
++                          memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
+                               break;
+       }
+       if (i == V2_DQSTRINBLK) {
+-              printk(KERN_ERR "VFS: Quota for id %u referenced but not present.\n", dquot->dq_id);
++              printk(KERN_ERR "VFS: Quota for id %u referenced "
++                "but not present.\n", dquot->dq_id);
+               ret = -EIO;
+               goto out_buf;
+       }
+       else
+-              ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
++              ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
++                v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
+ out_buf:
+       freedqbuf(buf);
+       return ret;
+@@ -592,14 +570,13 @@ out_buf:
+ /* Find entry for given id in the tree */
+ static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
+ {
+-      struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+       dqbuf_t buf = getdqbuf();
+       loff_t ret = 0;
+       __le32 *ref = (__le32 *)buf;
+       if (!buf)
+               return -ENOMEM;
+-      if ((ret = read_blk(filp, blk, buf)) < 0) {
++      if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
+               printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+               goto out_buf;
+       }
+@@ -625,16 +602,13 @@ static inline loff_t find_dqentry(struct
+ static int v2_read_dquot(struct dquot *dquot)
+ {
+       int type = dquot->dq_type;
+-      struct file *filp;
+-      mm_segment_t fs;
+       loff_t offset;
+       struct v2_disk_dqblk ddquot, empty;
+       int ret = 0;
+-      filp = sb_dqopt(dquot->dq_sb)->files[type];
+-
+ #ifdef __QUOTA_V2_PARANOIA
+-      if (!filp || !dquot->dq_sb) {   /* Invalidated quota? */
++      /* Invalidated quota? */
++      if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
+               printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
+               return -EIO;
+       }
+@@ -642,7 +616,8 @@ static int v2_read_dquot(struct dquot *d
+       offset = find_dqentry(dquot);
+       if (offset <= 0) {      /* Entry not present? */
+               if (offset < 0)
+-                      printk(KERN_ERR "VFS: Can't read quota structure for id %u.\n", dquot->dq_id);
++                      printk(KERN_ERR "VFS: Can't read quota "
++                        "structure for id %u.\n", dquot->dq_id);
+               dquot->dq_off = 0;
+               set_bit(DQ_FAKE_B, &dquot->dq_flags);
+               memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+@@ -650,12 +625,13 @@ static int v2_read_dquot(struct dquot *d
+       }
+       else {
+               dquot->dq_off = offset;
+-              fs = get_fs();
+-              set_fs(KERNEL_DS);
+-              if ((ret = filp->f_op->read(filp, (char *)&ddquot, sizeof(struct v2_disk_dqblk), &offset)) != sizeof(struct v2_disk_dqblk)) {
++              if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
++                  (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
++                  != sizeof(struct v2_disk_dqblk)) {
+                       if (ret >= 0)
+                               ret = -EIO;
+-                      printk(KERN_ERR "VFS: Error while reading quota structure for id %u.\n", dquot->dq_id);
++                      printk(KERN_ERR "VFS: Error while reading quota "
++                        "structure for id %u.\n", dquot->dq_id);
+                       memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
+               }
+               else {
+@@ -666,7 +642,6 @@ static int v2_read_dquot(struct dquot *d
+                       if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
+                               ddquot.dqb_itime = 0;
+               }
+-              set_fs(fs);
+               disk2memdqb(&dquot->dq_dqb, &ddquot);
+               if (!dquot->dq_dqb.dqb_bhardlimit &&
+                       !dquot->dq_dqb.dqb_bsoftlimit &&
+diff -puN include/linux/fs.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/fs.h
+--- 25/include/linux/fs.h~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.300106472 -0800
++++ 25-akpm/include/linux/fs.h 2004-12-03 20:56:04.319103584 -0800
+@@ -1004,6 +1004,9 @@ struct super_operations {
+       void (*umount_begin) (struct super_block *);
+
+       int (*show_options)(struct seq_file *, struct vfsmount *);
++
++      ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
++      ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+ };
+
+ /* Inode state bits.  Protected by inode_lock. */
+
+diff -puN include/linux/quota.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/quota.h
+--- 25/include/linux/quota.h~fix-of-quota-deadlock-on-pagelock-quota-core      2004-12-03 20:56:04.301106320 -0800
++++ 25-akpm/include/linux/quota.h      2004-12-03 20:56:04.320103432 -0800
+@@ -285,7 +285,7 @@ struct quota_info {
+       struct semaphore dqio_sem;              /* lock device while I/O in progress */
+       struct semaphore dqonoff_sem;           /* Serialize quotaon & quotaoff */
+       struct rw_semaphore dqptr_sem;          /* serialize ops using quota_info struct, pointers from inode to dquots */
+-      struct file *files[MAXQUOTAS];          /* fp's to quotafiles */
++      struct inode *files[MAXQUOTAS];         /* inodes of quotafiles */
+       struct mem_dqinfo info[MAXQUOTAS];      /* Information for each quota type */
+       struct quota_format_ops *ops[MAXQUOTAS];        /* Operations for each type */
+ };
+diff -puN include/linux/security.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/security.h
+--- 25/include/linux/security.h~fix-of-quota-deadlock-on-pagelock-quota-core   2004-12-03 20:56:04.303106016 -0800
++++ 25-akpm/include/linux/security.h   2004-12-03 20:56:04.322103128 -0800
+@@ -1033,7 +1033,7 @@ struct security_operations {
+       int (*sysctl) (ctl_table * table, int op);
+       int (*capable) (struct task_struct * tsk, int cap);
+       int (*quotactl) (int cmds, int type, int id, struct super_block * sb);
+-      int (*quota_on) (struct file * f);
++      int (*quota_on) (struct dentry * dentry);
+       int (*syslog) (int type);
+       int (*vm_enough_memory) (long pages);
+
+@@ -1281,9 +1281,9 @@ static inline int security_quotactl (int
+       return security_ops->quotactl (cmds, type, id, sb);
+ }
+-static inline int security_quota_on (struct file * file)
++static inline int security_quota_on (struct dentry * dentry)
+ {
+-      return security_ops->quota_on (file);
++      return security_ops->quota_on (dentry);
+ }
+ static inline int security_syslog(int type)
+@@ -1959,7 +1959,7 @@ static inline int security_quotactl (int
+       return 0;
+ }
+-static inline int security_quota_on (struct file * file)
++static inline int security_quota_on (struct dentry * dentry)
+ {
+       return 0;
+ }
+diff -puN security/dummy.c~fix-of-quota-deadlock-on-pagelock-quota-core security/dummy.c
+--- 25/security/dummy.c~fix-of-quota-deadlock-on-pagelock-quota-core   2004-12-03 20:56:04.304105864 -0800
++++ 25-akpm/security/dummy.c   2004-12-03 20:56:04.323102976 -0800
+@@ -92,7 +92,7 @@ static int dummy_quotactl (int cmds, int
+       return 0;
+ }
+-static int dummy_quota_on (struct file *f)
++static int dummy_quota_on (struct dentry *dentry)
+ {
+       return 0;
+ }
+diff -puN security/selinux/hooks.c~fix-of-quota-deadlock-on-pagelock-quota-core security/selinux/hooks.c
+--- 25/security/selinux/hooks.c~fix-of-quota-deadlock-on-pagelock-quota-core   2004-12-03 20:56:04.306105560 -0800
++++ 25-akpm/security/selinux/hooks.c   2004-12-03 20:56:04.326102520 -0800
+@@ -1494,9 +1494,9 @@ static int selinux_quotactl(int cmds, in
+       return rc;
+ }
+-static int selinux_quota_on(struct file *f)
++static int selinux_quota_on(struct dentry *dentry)
+ {
+-      return file_has_perm(current, f, FILE__QUOTAON);
++      return dentry_has_perm(current, NULL, dentry, FILE__QUOTAON);
+ }
+ static int selinux_syslog(int type)
+_