Whamcloud - gitweb
LU-1182 ldiskfs-osd: space accounting support
authorMikhail Pershin <tappro@whamcloud.com>
Tue, 19 Jun 2012 19:49:35 +0000 (23:49 +0400)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 29 Jun 2012 18:45:42 +0000 (14:45 -0400)
Add space accounting support to ldiskfs OSD.

This patch also sets initial attributes in do_create().
mdd_attr_set_internal() from mdd_object_initialize() is kept until
EDQUOT is returned in lquota itself.
Attributes of new inodes are now initialized in osd_object_create().
All LA_MODE bits are now passed to ldiskfs_create_inode().
(original patch from LiWei, see ORI-46)

Signed-off-by: Johann Lombardi <johann@whamcloud.com>
Change-Id: I77a621c76343c2633810bb3cef9859ee30b7b23a
Reviewed-on: http://review.whamcloud.com/3160
Reviewed-by: Niu Yawei <niu@whamcloud.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
15 files changed:
ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
lustre/osd-ldiskfs/Makefile.in
lustre/osd-ldiskfs/autoMakefile.am
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_oi.c
lustre/osd-ldiskfs/osd_quota.c [new file with mode: 0644]
lustre/osd-ldiskfs/osd_quota_fmt.c [new file with mode: 0644]
lustre/osd-ldiskfs/osd_quota_fmt.h [new file with mode: 0644]
lustre/quota/lquota_lib.c

diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch b/ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch
new file mode 100644 (file)
index 0000000..4c0a7f5
--- /dev/null
@@ -0,0 +1,92 @@
+commit 21f976975cbecbdaf23ceeacc1cab2b1c05a028e
+Author: Jan Kara <jack@suse.cz>
+Date:   Mon Apr 4 15:33:39 2011 -0400
+
+    ext4: remove unnecessary [cm]time update of quota file
+
+    It is not necessary to update [cm]time of quota file on each quota
+    file write and it wastes journal space and IO throughput with inode
+    writes. So just remove the updating from ext4_quota_write() and only
+    update times when quotas are being turned off. Userspace cannot get
+    anything reliable from quota files while they are used by the kernel
+    anyway.
+
+    Signed-off-by: Jan Kara <jack@suse.cz>
+    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h       2012-06-26 11:26:25.000000000 +0200
++++ linux-stage/fs/ext4/ext4_jbd2.h    2012-06-26 11:35:31.025105000 +0200
+@@ -88,8 +88,8 @@
+ #ifdef CONFIG_QUOTA
+ /* Amount of blocks needed for quota update - we know that the structure was
+- * allocated so we need to update only inode+data */
+-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
++ * allocated so we need to update only data block */
++#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
+ /* Amount of blocks needed for quota insert/delete - we do some block writes
+  * but inode, sb and group updates are done only once */
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2012-06-26 11:35:09.000000000 +0200
++++ linux-stage/fs/ext4/super.c        2012-06-26 11:37:30.905374000 +0200
+@@ -4582,6 +4582,7 @@ static int ext4_quota_on(struct super_bl
+ static int ext4_quota_off(struct super_block *sb, int type, int remount)
+ {
+       struct quota_info *dqopt = sb_dqopt(sb);
++      int                cnt;
+       mutex_lock(&dqopt->dqonoff_mutex);
+       if (!sb_any_quota_loaded(sb)) {
+@@ -4598,6 +4599,37 @@ static int ext4_quota_off(struct super_b
+               up_read(&sb->s_umount);
+       }
++      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++              struct inode      *inode;
++              handle_t          *handle;
++
++              if (type != -1 && cnt != type)
++                      continue;
++
++              mutex_lock(&dqopt->dqonoff_mutex);
++              inode = dqopt->files[cnt];
++              if (!sb_has_quota_loaded(sb, cnt) || !inode) {
++                      mutex_unlock(&dqopt->dqonoff_mutex);
++                      continue;
++              }
++
++              inode = igrab(inode);
++              mutex_unlock(&dqopt->dqonoff_mutex);
++
++              if (!inode)
++                      continue;
++
++              /* Update modification times of quota files when userspace can
++               * start looking at them */
++              handle = ext4_journal_start(inode, 1);
++              if (!IS_ERR(handle)) {
++                      inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++                      ext4_mark_inode_dirty(handle, inode);
++                      ext4_journal_stop(handle);
++              }
++              iput(inode);
++      }
++
+       return vfs_quota_off(sb, type, remount);
+ }
+@@ -4696,9 +4728,8 @@ out:
+       if (inode->i_size < off + len) {
+               i_size_write(inode, off + len);
+               EXT4_I(inode)->i_disksize = inode->i_size;
++              ext4_mark_inode_dirty(handle, inode);
+       }
+-      inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+-      ext4_mark_inode_dirty(handle, inode);
+       mutex_unlock(&inode->i_mutex);
+       return len;
+ }
diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch b/ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch
new file mode 100644 (file)
index 0000000..5452398
--- /dev/null
@@ -0,0 +1,399 @@
+From: Aditya Kali <adityakali@google.com>
+
+This patch is an attempt towards supporting quotas as first class
+feature in ext4. It is based on the proposal at:
+https://ext4.wiki.kernel.org/index.php/Design_For_1st_Class_Quota_in_Ext4
+This patch introduces a new feature - EXT4_FEATURE_RO_COMPAT_QUOTA which, when
+turned on, enables quota accounting at mount time iteself. Also, the
+quota inodes are stored in two additional superblock fields.
+Some changes introduced by this patch that should be pointed out are:
+1) Two new ext4-superblock fields - s_usr_quota_inum and s_grp_quota_inum
+   for storing the quota inodes in use.
+2) If the QUOTA feature and corresponding quota inodes are set in superblock,
+   Quotas are turned on at mount time irrespective of the quota mount options.
+   Thus the mount options 'quota', 'usrquota' and 'grpquota' are completely
+   ignored with the new QUOTA feature flag.
+3) Default quota inodes are: inode#3 for tracking userquota and inode#4 for
+   tracking group quota. The superblock fields can be set to use other inodes
+   as well.
+4) mke2fs or tune2fs will initialize these inodes when quota feature is
+   being set. The default reserved inodes will not be visible to user as
+   regular files.
+5) Once quotas are turned on, they cannot be turned off while the FS is
+   mounted. This is because we do not want to let the quota get inconsistent.
+6) With the QUOTA feature set, since the quota inodes are hidden, some of the
+   utilities from quota-tools will no longer work correctly. Instead, e2fsprogs
+   will include support for fixing the quota files.
+7) Support is only for the new V2 quota file format.
+
+Signed-off-by: Aditya Kali <adityakali@google.com>
+---
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h    2012-06-26 11:26:23.345235745 +0200
++++ linux-stage/fs/ext4/ext4.h 2012-06-26 11:37:38.250355000 +0200
+@@ -162,6 +162,8 @@ typedef struct ext4_io_end {
+  */
+ #define       EXT4_BAD_INO             1      /* Bad blocks inode */
+ #define EXT4_ROOT_INO          2      /* Root inode */
++#define EXT4_USR_QUOTA_INO       3      /* User quota inode */
++#define EXT4_GRP_QUOTA_INO       4      /* Group quota inode */
+ #define EXT4_BOOT_LOADER_INO   5      /* Boot loader inode */
+ #define EXT4_UNDEL_DIR_INO     6      /* Undelete directory inode */
+ #define EXT4_RESIZE_INO                7      /* Reserved group descriptors inode */
+@@ -1016,7 +1018,9 @@ struct ext4_super_block {
+       __u8    s_last_error_func[32];  /* function where the error happened */
+ #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
+       __u8    s_mount_opts[64];
+-      __le32  s_reserved[112];        /* Padding to the end of the block */
++      __le32  s_usr_quota_inum;       /* inode for tracking user quota */
++      __le32  s_grp_quota_inum;       /* inode for tracking group quota */
++      __le32  s_reserved[110];        /* Padding to the end of the block */
+ };
+ #ifdef __KERNEL__
+@@ -1090,6 +1094,7 @@ struct ext4_sb_info {
+ #ifdef CONFIG_QUOTA
+       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
+       int s_jquota_fmt;                       /* Format of quota to use */
++      unsigned long s_qf_inums[MAXQUOTAS];    /* Quota file inodes */
+ #endif
+       unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+       struct rb_root system_blks;
+@@ -1189,6 +1194,8 @@ static inline struct timespec ext4_curre
+ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
+ {
+       return ino == EXT4_ROOT_INO ||
++              ino == EXT4_USR_QUOTA_INO ||
++              ino == EXT4_GRP_QUOTA_INO ||
+               ino == EXT4_JOURNAL_INO ||
+               ino == EXT4_RESIZE_INO ||
+               (ino >= EXT4_FIRST_INO(sb) &&
+@@ -1293,6 +1300,7 @@ EXT4_INODE_BIT_FNS(state, state_flags)
+ #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM               0x0010
+ #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK      0x0020
+ #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE    0x0040
++#define EXT4_FEATURE_RO_COMPAT_QUOTA          0x0100
+ #define EXT4_FEATURE_INCOMPAT_COMPRESSION     0x0001
+ #define EXT4_FEATURE_INCOMPAT_FILETYPE                0x0002
+@@ -1325,7 +1333,8 @@ EXT4_INODE_BIT_FNS(state, state_flags)
+                                        EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
+                                        EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
+                                        EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
+-                                       EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
++                                       EXT4_FEATURE_RO_COMPAT_HUGE_FILE| \
++                                       EXT4_FEATURE_RO_COMPAT_QUOTA)
+ /*
+  * Default values for user and/or group using reserved blocks
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h       2012-06-26 11:35:31.025105000 +0200
++++ linux-stage/fs/ext4/ext4_jbd2.h    2012-06-26 11:37:38.250631000 +0200
+@@ -89,14 +89,20 @@
+ #ifdef CONFIG_QUOTA
+ /* Amount of blocks needed for quota update - we know that the structure was
+  * allocated so we need to update only data block */
+-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
++#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
++              EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
++              1 : 0)
+ /* Amount of blocks needed for quota insert/delete - we do some block writes
+  * but inode, sb and group updates are done only once */
+-#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+-              (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
++#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
++              EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
++              (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
++               +3+DQUOT_INIT_REWRITE) : 0)
+-#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+-              (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
++#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
++              EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
++              (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
++               +3+DQUOT_DEL_REWRITE) : 0)
+ #else
+ #define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2012-06-26 11:37:30.905374000 +0200
++++ linux-stage/fs/ext4/super.c        2012-06-26 11:38:30.997488000 +0200
+@@ -86,6 +86,11 @@ wait_queue_head_t aio_wq[WQ_HASH_SZ];
+ static int bigendian_extents;
++#ifdef CONFIG_QUOTA
++static int ext4_acct_on(struct super_block *sb);
++static int ext4_acct_off(struct super_block *sb);
++#endif
++
+ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
+                              struct ext4_group_desc *bg)
+ {
+@@ -670,6 +675,12 @@ static void ext4_put_super(struct super_
+       ext4_unregister_li_request(sb);
++#ifdef CONFIG_QUOTA
++      /* disable usage tracking which was enabled at mount time */
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
++              ext4_acct_off(sb);
++#endif
++
+       flush_workqueue(sbi->dio_unwritten_wq);
+       destroy_workqueue(sbi->dio_unwritten_wq);
+@@ -2142,14 +2153,22 @@ static void ext4_orphan_cleanup(struct s
+ #ifdef CONFIG_QUOTA
+       /* Needed for iput() to work correctly and not trash data */
+       sb->s_flags |= MS_ACTIVE;
+-      /* Turn on quotas so that they are updated correctly */
+-      for (i = 0; i < MAXQUOTAS; i++) {
+-              if (EXT4_SB(sb)->s_qf_names[i]) {
+-                      int ret = ext4_quota_on_mount(sb, i);
+-                      if (ret < 0)
+-                              ext4_msg(sb, KERN_ERR,
+-                                      "Cannot turn on journaled "
+-                                      "quota: error %d", ret);
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++              int ret;
++              ret = ext4_acct_on(sb);
++              if (ret)
++                      ext4_msg(sb, KERN_ERR, "Failed to turn on usage "
++                               "tracking for quota: error %d", ret);
++      } else {
++              /* Turn on quotas so that they are updated correctly */
++              for (i = 0; i < MAXQUOTAS; i++) {
++                      if (EXT4_SB(sb)->s_qf_names[i]) {
++                              int ret = ext4_quota_on_mount(sb, i);
++                              if (ret < 0)
++                                      ext4_msg(sb, KERN_ERR,
++                                              "Cannot turn on journaled "
++                                              "quota: error %d", ret);
++                      }
+               }
+       }
+ #endif
+@@ -2193,10 +2212,14 @@ static void ext4_orphan_cleanup(struct s
+               ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
+                      PLURAL(nr_truncates));
+ #ifdef CONFIG_QUOTA
+-      /* Turn quotas off */
+-      for (i = 0; i < MAXQUOTAS; i++) {
+-              if (sb_dqopt(sb)->files[i])
+-                      vfs_quota_off(sb, i, 0);
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++              ext4_acct_off(sb);
++      } else {
++              /* Turn quotas off */
++              for (i = 0; i < MAXQUOTAS; i++) {
++                      if (sb_dqopt(sb)->files[i])
++                              vfs_quota_off(sb, i, 0);
++              }
+       }
+ #endif
+       sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+@@ -3395,6 +3418,15 @@ static int ext4_fill_super(struct super_
+ #ifdef CONFIG_QUOTA
+       sb->s_qcop = &ext4_qctl_operations;
+       sb->dq_op = &ext4_quota_operations;
++
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++              /* Use new qctl operations with quota on function that does not
++               * require user specified quota file path. */
++              sb->s_qcop = &ext4_qctl_operations;
++
++              sbi->s_qf_inums[USRQUOTA] = es->s_usr_quota_inum;
++              sbi->s_qf_inums[GRPQUOTA] = es->s_grp_quota_inum;
++      }
+ #endif
+       INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+       mutex_init(&sbi->s_orphan_lock);
+@@ -3622,8 +3654,31 @@ no_journal:
+       } else
+               descr = "out journal";
+-      ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+-               "Opts: %s%s", descr, sbi->s_es->s_mount_opts,
++#ifdef CONFIG_QUOTA
++      /* Enable space tracking during mount, enforcement can be enabled/disable
++       * later with quota_on/off */
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
++          !(sb->s_flags & MS_RDONLY)) {
++              ret = ext4_acct_on(sb);
++              if (ret) {
++                      ext4_msg(sb, KERN_ERR, "Can't enable usage tracking on "
++                               "a filesystem with the QUOTA feature set");
++                      goto failed_mount4;
++              }
++      }
++#else
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
++          !(sb->s_flags & MS_RDONLY))
++              ext4_msg(sb, KERN_WARNING, "Mounting a filesystem with the "
++                       "QUOTA feature set whereas the kernel does not "
++                       "support quota, e2fsck will be required to fix usage "
++                       "information");
++
++#endif  /* CONFIG_QUOTA */
++
++      ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. quota=%s. "
++               "Opts: %s%s", descr, sb_any_quota_loaded(sb) ? "on" : "off",
++               sbi->s_es->s_mount_opts,
+                *sbi->s_es->s_mount_opts ? "; " : "");
+       lock_kernel();
+@@ -3981,6 +4036,12 @@ static int ext4_commit_super(struct supe
+                                       &EXT4_SB(sb)->s_freeblocks_counter));
+       es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
+                                       &EXT4_SB(sb)->s_freeinodes_counter));
++#ifdef CONFIG_QUOTA
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++              es->s_usr_quota_inum = EXT4_SB(sb)->s_qf_inums[USRQUOTA];
++              es->s_grp_quota_inum = EXT4_SB(sb)->s_qf_inums[GRPQUOTA];
++      }
++#endif
+       sb->s_dirt = 0;
+       BUFFER_TRACE(sbh, "marking dirty");
+       mark_buffer_dirty(sbh);
+@@ -4531,6 +4592,22 @@ static int ext4_quota_on(struct super_bl
+       int err;
+       struct path path;
++      /* When QUOTA feature is set, quota on enables enforcement, accounting
++       * being already enabled at mount time */
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++              struct inode *qf_inode;
++
++              if (!EXT4_SB(sb)->s_qf_inums[type])
++                      return -EINVAL;
++              qf_inode = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[type]);
++              if (IS_ERR(qf_inode))
++                      return PTR_ERR(qf_inode);
++              err = vfs_quota_enable(qf_inode, type, QFMT_VFS_V1,
++                                     DQUOT_LIMITS_ENABLED);
++              iput(qf_inode);
++              return err;
++      }
++
+       if (!test_opt(sb, QUOTA))
+               return -EINVAL;
+       /* When remounting, no checks are needed and in fact, name is NULL */
+@@ -4630,9 +4707,114 @@ static int ext4_quota_off(struct super_b
+               iput(inode);
+       }
++      /* When QUOTA feature is set, quota off just disables enforcement but
++       * leaves accounting on */
++      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
++              return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
++
+       return vfs_quota_off(sb, type, remount);
+ }
++/*
++ * New quota_on function that is used to turn accounting on when QUOTA
++ * feature is set.
++ */
++static int ext4_acct_on(struct super_block *sb)
++{
++      struct inode *qf_inode[MAXQUOTAS];
++      int           rc;
++
++      if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
++          !EXT4_SB(sb)->s_qf_inums[USRQUOTA] ||
++          !EXT4_SB(sb)->s_qf_inums[GRPQUOTA])
++              return -EINVAL;
++
++      qf_inode[USRQUOTA] = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[USRQUOTA]);
++      if (IS_ERR(qf_inode[USRQUOTA])) {
++              EXT4_SB(sb)->s_qf_inums[USRQUOTA] = 0;
++              return PTR_ERR(qf_inode[USRQUOTA]);
++      }
++      qf_inode[GRPQUOTA] = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[GRPQUOTA]);
++      if (IS_ERR(qf_inode[GRPQUOTA])) {
++              iput(qf_inode[USRQUOTA]);
++              EXT4_SB(sb)->s_qf_inums[GRPQUOTA] = 0;
++              return PTR_ERR(qf_inode[GRPQUOTA]);
++      }
++
++      /*
++       * When we journal data on quota file, we have to flush journal to see
++       * all updates to the file when we bypass pagecache...
++       */
++      if (EXT4_SB(sb)->s_journal) {
++              /*
++               * We don't need to lock updates but journal_flush() could
++               * otherwise be livelocked...
++               */
++              jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
++              rc = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
++              jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
++              if (rc) {
++                      iput(qf_inode[USRQUOTA]);
++                      iput(qf_inode[GRPQUOTA]);
++                      return rc;
++              }
++      }
++
++      /* only enable quota accounting by default */
++      rc = vfs_quota_enable(qf_inode[USRQUOTA], USRQUOTA, QFMT_VFS_V1,
++                            DQUOT_USAGE_ENABLED);
++      iput(qf_inode[USRQUOTA]);
++      if (rc) {
++              iput(qf_inode[GRPQUOTA]);
++              return rc;
++      }
++      rc = vfs_quota_enable(qf_inode[GRPQUOTA], GRPQUOTA, QFMT_VFS_V1,
++                            DQUOT_USAGE_ENABLED);
++      iput(qf_inode[GRPQUOTA]);
++      return rc;
++}
++
++/*
++ * New quota_on function that is used to turn off accounting when QUOTA feature
++ * is set.
++ */
++static int ext4_acct_off(struct super_block *sb)
++{
++      int type, rc = 0;
++
++      if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
++              return -EINVAL;
++
++      for (type = 0; type < MAXQUOTAS; type++) {
++              struct inode *inode = sb_dqopt(sb)->files[type];
++              handle_t     *handle;
++
++              if (!inode)
++                      continue;
++              /* Update modification times of quota files when userspace can
++               * start looking at them */
++              handle = ext4_journal_start(inode, 1);
++              if (IS_ERR(handle))
++                      goto out;
++
++              inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++              ext4_mark_inode_dirty(handle, inode);
++              ext4_journal_stop(handle);
++      }
++
++out:
++      for (type = 0; type < MAXQUOTAS; type++) {
++              int ret;
++              ret = vfs_quota_disable(sb, type,
++                                  DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
++              if (!rc && ret)
++                      rc = ret;
++      }
++      return rc;
++}
++
++
++
+ /* Read data from quotafile - avoid pagecache and such because we cannot afford
+  * acquiring the locks... As quota files are never truncated and quota code
+  * itself serializes the operations (and noone else should touch the files)
diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch b/ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch
new file mode 100644 (file)
index 0000000..d72dd05
--- /dev/null
@@ -0,0 +1,62 @@
+commit ca0e05e4b15193aeba72b995e90de990db7f8304
+Author: Dmitry Monakhov <dmonakhov@openvz.org>
+Date:   Sun Aug 1 17:48:36 2010 -0400
+
+    ext4: force block allocation on quota_off
+    
+    Perform full sync procedure so that any delayed allocation blocks are
+    allocated so quota will be consistent.
+    
+    Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2012-06-26 09:37:06.039508000 +0200
++++ linux-stage/fs/ext4/super.c        2012-06-26 11:35:09.824099000 +0200
+@@ -1104,6 +1104,7 @@ static int ext4_mark_dquot_dirty(struct
+ static int ext4_write_info(struct super_block *sb, int type);
+ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
+                               char *path, int remount);
++static int ext4_quota_off(struct super_block *sb, int type, int remount);
+ static int ext4_quota_on_mount(struct super_block *sb, int type);
+ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
+                              size_t len, loff_t off);
+@@ -1173,7 +1174,7 @@ static const struct dquot_operations ext
+ static const struct quotactl_ops ext4_qctl_operations = {
+       .quota_on       = ext4_quota_on,
+-      .quota_off      = vfs_quota_off,
++      .quota_off      = ext4_quota_off,
+       .quota_sync     = vfs_quota_sync,
+       .get_info       = vfs_get_dqinfo,
+       .set_info       = vfs_set_dqinfo,
+@@ -4578,6 +4579,28 @@ static int ext4_quota_on(struct super_bl
+       return err;
+ }
++static int ext4_quota_off(struct super_block *sb, int type, int remount)
++{
++      struct quota_info *dqopt = sb_dqopt(sb);
++
++      mutex_lock(&dqopt->dqonoff_mutex);
++      if (!sb_any_quota_loaded(sb)) {
++              /* nothing to do */
++              mutex_unlock(&dqopt->dqonoff_mutex);
++              return 0;
++      }
++      mutex_unlock(&dqopt->dqonoff_mutex);
++
++      /* Force all delayed allocation blocks to be allocated. */
++      if (test_opt(sb, DELALLOC)) {
++              down_read(&sb->s_umount);
++              sync_filesystem(sb);
++              up_read(&sb->s_umount);
++      }
++
++      return vfs_quota_off(sb, type, remount);
++}
++
+ /* Read data from quotafile - avoid pagecache and such because we cannot afford
+  * acquiring the locks... As quota files are never truncated and quota code
+  * itself serializes the operations (and noone else should touch the files)
diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch
new file mode 100644 (file)
index 0000000..1e98c8f
--- /dev/null
@@ -0,0 +1,20 @@
+Index: linux-2.6.18-238.12.1/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.18-238.12.1.orig/fs/ext4/ext4.h  2011-09-21 17:55:44.627741549 +0200
++++ linux-2.6.18-238.12.1/fs/ext4/ext4.h       2011-09-21 18:05:20.974106450 +0200
+@@ -971,6 +971,7 @@
+ #ifdef CONFIG_QUOTA
+       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
+       int s_jquota_fmt;                       /* Format of quota to use */
++      unsigned long s_qf_inums[MAXQUOTAS];    /* Quota file inodes */
+ #endif
+       unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+       struct rb_root system_blks;
+@@ -1171,6 +1172,7 @@
+ #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM               0x0010
+ #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK      0x0020
+ #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE    0x0040
++#define EXT4_FEATURE_RO_COMPAT_QUOTA          0x0100
+ #define EXT4_FEATURE_INCOMPAT_COMPRESSION     0x0001
+ #define EXT4_FEATURE_INCOMPAT_FILETYPE                0x0002
index 0cc00f9..abf7009 100644 (file)
@@ -38,3 +38,4 @@ ext4-vmalloc-rhel5.patch
 ext4-mballoc-group_check-rhel5.patch
 ext4-journal-callback-rhel5.patch
 ext4-store-tree-generation-at-find.patch
+ext4-quota-minimal-rhel5.patch
index 1dcd44e..416916f 100644 (file)
@@ -35,3 +35,6 @@ ext4-vmalloc-rhel6.patch
 ext4-journal-callback.patch
 ext4-store-tree-generation-at-find.patch
 ext4_pdirop-rhel6.patch
+ext4-quota-force-block-alloc-quotaoff.patch
+ext4-quota-dont-update-cmtime.patch
+ext4-quota-first-class.patch
index 179bf2f..fda3f84 100644 (file)
@@ -1,7 +1,7 @@
 MODULES := osd_ldiskfs
 osd_ldiskfs-objs := osd_handler.o osd_oi.o osd_igif.o osd_lproc.o osd_iam.o \
                    osd_iam_lfix.o osd_iam_lvar.o osd_io.o osd_compat.o \
-                   osd_scrub.o
+                   osd_scrub.o osd_quota.o osd_quota_fmt.o
 
 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs
 
index 3294c28..2952a75 100644 (file)
@@ -40,4 +40,4 @@ endif
 
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
 EXTRA_DIST := $(osd_ldiskfs-objs:%.o=%.c) osd_internal.h osd_oi.h osd_igif.h \
-             osd_iam.h osd_scrub.h
+             osd_iam.h osd_scrub.h osd_quota_fmt.h
index ed4ce8d..eced8db 100644 (file)
@@ -72,6 +72,8 @@
 
 /* llo_* api support */
 #include <md_object.h>
+/* dt_acct_features */
+#include <lquota.h>
 
 #ifdef HAVE_LDISKFS_PDO
 int ldiskfs_pdo = 1;
@@ -1501,6 +1503,32 @@ static int osd_inode_setattr(const struct lu_env *env,
         return 0;
 }
 
+static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr)
+{
+       if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) ||
+           (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) {
+               struct iattr    iattr;
+               int             rc;
+
+               iattr.ia_valid = 0;
+               if (attr->la_valid & LA_UID)
+                       iattr.ia_valid |= ATTR_UID;
+               if (attr->la_valid & LA_GID)
+                       iattr.ia_valid |= ATTR_GID;
+               iattr.ia_uid = attr->la_uid;
+               iattr.ia_gid = attr->la_gid;
+
+               rc = ll_vfs_dq_transfer(inode, &iattr);
+               if (rc) {
+                       CERROR("%s: quota transfer failed: rc = %d. Is quota "
+                              "enforcement enabled on the ldiskfs filesystem?",
+                              inode->i_sb->s_id, rc);
+                       return rc;
+               }
+       }
+       return 0;
+}
+
 static int osd_attr_set(const struct lu_env *env,
                         struct dt_object *dt,
                         const struct lu_attr *attr,
@@ -1521,28 +1549,34 @@ static int osd_attr_set(const struct lu_env *env,
         OSD_EXEC_OP(handle, attr_set);
 
         inode = obj->oo_inode;
+       if (LDISKFS_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                         LDISKFS_FEATURE_RO_COMPAT_QUOTA)) {
+               rc = osd_quota_transfer(inode, attr);
+               if (rc)
+                       return rc;
+       } else {
 #ifdef HAVE_QUOTA_SUPPORT
-        if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) ||
-            (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) {
-                struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt;
-                struct iattr iattr;
-                int rc;
-
-                iattr.ia_valid = 0;
-                if (attr->la_valid & LA_UID)
-                        iattr.ia_valid |= ATTR_UID;
-                if (attr->la_valid & LA_GID)
-                        iattr.ia_valid |= ATTR_GID;
-                iattr.ia_uid = attr->la_uid;
-                iattr.ia_gid = attr->la_gid;
-                osd_push_ctxt(env, save);
-                rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
-                osd_pop_ctxt(save);
-                if (rc != 0)
-                        return rc;
-        }
+               if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) ||
+                   (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) {
+                       struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt;
+                       struct           iattr iattr;
+                       int              rc;
+
+                       iattr.ia_valid = 0;
+                       if (attr->la_valid & LA_UID)
+                               iattr.ia_valid |= ATTR_UID;
+                       if (attr->la_valid & LA_GID)
+                               iattr.ia_valid |= ATTR_GID;
+                       iattr.ia_uid = attr->la_uid;
+                       iattr.ia_gid = attr->la_gid;
+                       osd_push_ctxt(env, save);
+                       rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
+                       osd_pop_ctxt(save);
+                       if (rc != 0)
+                               return rc;
+               }
 #endif
-
+       }
         cfs_spin_lock(&obj->oo_guard);
         rc = osd_inode_setattr(env, inode, attr);
         cfs_spin_unlock(&obj->oo_guard);
@@ -1552,26 +1586,6 @@ static int osd_attr_set(const struct lu_env *env,
         return rc;
 }
 
-/*
- * Object creation.
- *
- * XXX temporary solution.
- */
-static int osd_create_pre(struct osd_thread_info *info, struct osd_object *obj,
-                          struct lu_attr *attr, struct thandle *th)
-{
-        return 0;
-}
-
-static int osd_create_post(struct osd_thread_info *info, struct osd_object *obj,
-                           struct lu_attr *attr, struct thandle *th)
-{
-        osd_object_init0(obj);
-        if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW))
-                unlock_new_inode(obj->oo_inode);
-        return 0;
-}
-
 struct dentry *osd_child_dentry_get(const struct lu_env *env,
                                     struct osd_object *obj,
                                     const char *name, const int namelen)
@@ -1681,7 +1695,7 @@ static int osd_mk_index(struct osd_thread_info *info, struct osd_object *obj,
         struct osd_thandle *oth;
         const struct dt_index_features *feat = dof->u.dof_idx.di_feat;
 
-        __u32 mode = (attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX));
+        __u32 mode = (attr->la_mode & (S_IFMT | S_IALLUGO | S_ISVTX));
 
         LASSERT(S_ISREG(attr->la_mode));
 
@@ -1716,7 +1730,7 @@ static int osd_mkreg(struct osd_thread_info *info, struct osd_object *obj,
 {
         LASSERT(S_ISREG(attr->la_mode));
         return osd_mkfile(info, obj, (attr->la_mode &
-                               (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th);
+                               (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th);
 }
 
 static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj,
@@ -1727,7 +1741,7 @@ static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj,
 {
         LASSERT(S_ISLNK(attr->la_mode));
         return osd_mkfile(info, obj, (attr->la_mode &
-                              (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th);
+                              (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th);
 }
 
 static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj,
@@ -1736,7 +1750,7 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj,
                      struct dt_object_format *dof,
                      struct thandle *th)
 {
-        cfs_umode_t mode = attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX);
+        cfs_umode_t mode = attr->la_mode & (S_IFMT | S_IALLUGO | S_ISVTX);
         int result;
 
         LINVRNT(osd_invariant(obj));
@@ -1747,6 +1761,10 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj,
         result = osd_mkfile(info, obj, mode, hint, th);
         if (result == 0) {
                 LASSERT(obj->oo_inode != NULL);
+               /*
+                * This inode should be marked dirty for i_rdev.  Currently
+                * that is done in the osd_attr_init().
+                */
                 init_special_inode(obj->oo_inode, mode, attr->la_rdev);
         }
         LINVRNT(osd_invariant(obj));
@@ -1798,6 +1816,51 @@ static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah,
         ah->dah_mode = child_mode;
 }
 
+static void osd_attr_init(struct osd_thread_info *info, struct osd_object *obj,
+                         struct lu_attr *attr, struct dt_object_format *dof)
+{
+       struct inode   *inode = obj->oo_inode;
+       __u64           valid = attr->la_valid;
+       int             result;
+
+       attr->la_valid &= ~(LA_TYPE | LA_MODE);
+
+        if (dof->dof_type != DFT_NODE)
+                attr->la_valid &= ~LA_RDEV;
+        if ((valid & LA_ATIME) && (attr->la_atime == LTIME_S(inode->i_atime)))
+                attr->la_valid &= ~LA_ATIME;
+        if ((valid & LA_CTIME) && (attr->la_ctime == LTIME_S(inode->i_ctime)))
+                attr->la_valid &= ~LA_CTIME;
+        if ((valid & LA_MTIME) && (attr->la_mtime == LTIME_S(inode->i_mtime)))
+                attr->la_valid &= ~LA_MTIME;
+
+       if (LDISKFS_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                         LDISKFS_FEATURE_RO_COMPAT_QUOTA)) {
+               result = osd_quota_transfer(inode, attr);
+               if (result)
+                       return;
+       } else {
+#ifdef HAVE_QUOTA_SUPPORT
+               attr->la_valid &= ~(LA_UID | LA_GID);
+#endif
+       }
+
+        if (attr->la_valid != 0) {
+                result = osd_inode_setattr(info->oti_env, inode, attr);
+                /*
+                 * The osd_inode_setattr() should always succeed here.  The
+                 * only error that could be returned is EDQUOT when we are
+                 * trying to change the UID or GID of the inode. However, this
+                 * should not happen since quota enforcement is no longer
+                 * enabled on ldiskfs (lquota takes care of it).
+                 */
+                LASSERTF(result == 0, "%d", result);
+                inode->i_sb->s_op->dirty_inode(inode);
+        }
+
+        attr->la_valid = valid;
+}
+
 /**
  * Helper function for osd_object_create()
  *
@@ -1809,16 +1872,26 @@ static int __osd_object_create(struct osd_thread_info *info,
                                struct dt_object_format *dof,
                                struct thandle *th)
 {
+       int     result;
+       __u32   umask;
 
-        int result;
+       /* we drop umask so that permissions we pass are not affected */
+       umask = current->fs->umask;
+       current->fs->umask = 0;
 
-        result = osd_create_pre(info, obj, attr, th);
+       result = osd_create_type_f(dof->dof_type)(info, obj, attr, hint, dof,
+                                                 th);
         if (result == 0) {
-                result = osd_create_type_f(dof->dof_type)(info, obj,
-                                           attr, hint, dof, th);
-                if (result == 0)
-                        result = osd_create_post(info, obj, attr, th);
+               osd_attr_init(info, obj, attr, dof);
+               osd_object_init0(obj);
+               /* bz 24037 */
+               if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW))
+                       unlock_new_inode(obj->oo_inode);
         }
+
+       /* restore previous umask value */
+       current->fs->umask = umask;
+
         return result;
 }
 
@@ -1900,6 +1973,11 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt,
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
+       if (unlikely(fid_is_acct(fid)))
+               /* Quota files can't be created from the kernel any more,
+                * 'tune2fs -O quota' will take care of creating them */
+               RETURN(-EPERM);
+
         OSD_EXEC_OP(th, create);
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
@@ -1958,6 +2036,9 @@ static int osd_object_destroy(const struct lu_env *env,
         LASSERT(inode);
         LASSERT(!lu_object_is_dying(dt->do_lu.lo_header));
 
+       if (unlikely(fid_is_acct(fid)))
+               RETURN(-EPERM);
+
        /* Parallel control for OI scrub. For most of cases, there is no
         * lock contention. So it will not affect unlink performance. */
        cfs_mutex_lock(&inode->i_mutex);
@@ -2107,6 +2188,11 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
+       if (unlikely(fid_is_acct(fid)))
+               /* Quota files can't be created from the kernel any more,
+                * 'tune2fs -O quota' will take care of creating them */
+               RETURN(-EPERM);
+
         OSD_EXEC_OP(th, create);
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
@@ -2601,10 +2687,10 @@ static int osd_iam_container_init(const struct lu_env *env,
 static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
                          const struct dt_index_features *feat)
 {
-        int result;
-        int ea_dir = 0;
-        struct osd_object *obj = osd_dt_obj(dt);
-        struct osd_device *osd = osd_obj2dev(obj);
+       int                      result;
+       int                      skip_iam = 0;
+       struct osd_object       *obj = osd_dt_obj(dt);
+       struct osd_device       *osd = osd_obj2dev(obj);
 
         LINVRNT(osd_invariant(obj));
         LASSERT(dt_object_exists(dt));
@@ -2618,10 +2704,14 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
                         result = 0;
                 else
                         result = -ENOTDIR;
-                ea_dir = 1;
+               skip_iam = 1;
        } else if (unlikely(feat == &dt_otable_features)) {
                dt->do_index_ops = &osd_otable_ops;
                return 0;
+       } else if (feat == &dt_acct_features) {
+               dt->do_index_ops = &osd_acct_index_ops;
+               result = 0;
+               skip_iam = 1;
         } else if (!osd_has_index(obj)) {
                 struct osd_directory *dir;
 
@@ -2657,7 +2747,7 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
                 result = 0;
         }
 
-        if (result == 0 && ea_dir == 0) {
+       if (result == 0 && skip_iam == 0) {
                 if (!osd_iam_index_probe(env, obj, feat))
                         result = -ENOTDIR;
         }
@@ -4179,7 +4269,10 @@ struct lu_context_key osd_key = {
 static int osd_device_init(const struct lu_env *env, struct lu_device *d,
                            const char *name, struct lu_device *next)
 {
-        return osd_procfs_init(osd_dev(d), name);
+       struct osd_device *osd = osd_dev(d);
+
+       strncpy(osd->od_svname, name, MAX_OBD_NAME);
+       return osd_procfs_init(osd, name);
 }
 
 static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
@@ -4190,7 +4283,13 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
 
        if (o->od_fsops) {
                fsfilt_put_ops(o->od_fsops);
-       o->od_fsops = NULL;
+               o->od_fsops = NULL;
+       }
+
+       /* shutdown quota slave instance associated with the device */
+       if (o->od_quota_slave != NULL) {
+               qsd_fini(env, o->od_quota_slave);
+               o->od_quota_slave = NULL;
        }
 
        RETURN(0);
@@ -4358,10 +4457,19 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
         if (result < 0)
                 RETURN(result);
 
+       /* 2. setup quota slave instance */
+       osd->od_quota_slave = qsd_init(env, osd->od_svname, &osd->od_dt_dev,
+                                      osd->od_proc_entry);
+       if (IS_ERR(osd->od_quota_slave)) {
+               result = PTR_ERR(osd->od_quota_slave);
+               osd->od_quota_slave = NULL;
+               RETURN(result);
+       }
+
         if (!lu_device_is_md(pdev))
                 RETURN(0);
 
-        /* 2. setup local objects */
+        /* 3. setup local objects */
         result = llo_local_objects_setup(env, lu2md_dev(pdev), lu2dt_dev(dev));
         RETURN(result);
 }
index 7e573d1..b757b53 100644 (file)
 #include <obd_class.h>
 #include <lustre_disk.h>
 #include <dt_object.h>
+#include <lquota.h>
 
 #include "osd_oi.h"
 #include "osd_iam.h"
 #include "osd_scrub.h"
+#include "osd_quota_fmt.h"
 
 struct inode;
 
@@ -301,6 +303,12 @@ struct osd_device {
        cfs_mutex_t               od_otable_mutex;
        struct osd_otable_it     *od_otable_it;
        struct osd_scrub          od_scrub;
+
+       /* service name associated with the osd device */
+       char                      od_svname[MAX_OBD_NAME];
+
+       /* quota slave instance */
+       struct qsd_instance      *od_quota_slave;
 };
 
 #define OSD_TRACK_DECLARES
@@ -465,6 +473,19 @@ struct osd_it_iam {
         struct iam_iterator    oi_it;
 };
 
+/**
+ * Iterator's in-memory data structure for quota file.
+ */
+struct osd_it_quota {
+       struct osd_object       *oiq_obj;
+       /** tree blocks path to where the entry is stored */
+       uint                     oiq_blk[LUSTRE_DQTREEDEPTH];
+       /** on-disk offset for current key where quota record can be found */
+       loff_t                   oiq_offset;
+       /** identifier for current quota record */
+       __u64                    oiq_id;
+};
+
 #define MAX_BLOCKS_PER_PAGE (CFS_PAGE_SIZE / 512)
 
 struct osd_iobuf {
@@ -528,11 +549,13 @@ struct osd_thread_info {
 
         /** osd iterator context used for iterator session */
 
-        union {
-                struct osd_it_iam      oti_it;
-                /** ldiskfs iterator data structure, see osd_it_ea_{init, fini} */
-                struct osd_it_ea       oti_it_ea;
-        };
+       union {
+               struct osd_it_iam       oti_it;
+               /* ldiskfs iterator data structure,
+                * see osd_it_ea_{init, fini} */
+               struct osd_it_ea        oti_it_ea;
+               struct osd_it_quota     oti_it_quota;
+       };
 
         /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */
         void                  *oti_it_ea_buf;
@@ -573,6 +596,12 @@ struct osd_thread_info {
 #define OSD_FID_REC_SZ 32
         char                   oti_ldp[OSD_FID_REC_SZ];
         char                   oti_ldp2[OSD_FID_REC_SZ];
+
+       /* used by quota code */
+       union {
+               struct if_dqblk         oti_dqblk;
+               struct if_dqinfo        oti_dqinfo;
+       };
 };
 
 extern int ldiskfs_pdo;
@@ -629,6 +658,17 @@ int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic,
 int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
                   struct osd_inode_id *id);
 
+/* osd_quota_fmt.c */
+int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj,
+                      int type, uint blk, int depth, uint index,
+                      struct osd_it_quota *it);
+int walk_block_dqentry(const struct lu_env *env, struct osd_object *obj,
+                       int type, uint blk, uint index,
+                       struct osd_it_quota *it);
+loff_t find_tree_dqentry(const struct lu_env *env,
+                         struct osd_object *obj, int type,
+                         qid_t dqid, uint blk, int depth,
+                         struct osd_it_quota *it);
 /*
  * Invariants, assertions.
  */
@@ -819,5 +859,12 @@ int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack)
         return result;
 }
 
+/**
+ * Quota/Accounting handling
+ */
+extern const struct dt_index_operations osd_acct_index_ops;
+int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, struct osd_inode_id *id);
+
 #endif /* __KERNEL__ */
 #endif /* _OSD_INTERNAL_H */
index 544eabc..dfa42ab 100644 (file)
@@ -508,6 +508,9 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
                osd_id_gen(id, osd_sb(osd)->s_root->d_inode->i_ino,
                           osd_sb(osd)->s_root->d_inode->i_generation);
        } else {
+               if (unlikely(fid_is_acct(fid)))
+                       return osd_acct_obj_lookup(info, osd, fid, id);
+
                if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE))
                        return osd_compat_spec_lookup(info, osd, fid, id);
 
diff --git a/lustre/osd-ldiskfs/osd_quota.c b/lustre/osd-ldiskfs/osd_quota.c
new file mode 100644 (file)
index 0000000..e636c6d
--- /dev/null
@@ -0,0 +1,389 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Use is subject to license terms.
+ *
+ * Author: Johann Lombardi <johann@whamcloud.com>
+ * Author: Niu    Yawei    <niu@whamcloud.com>
+ */
+
+#include <lquota.h>
+#include "osd_internal.h"
+
+/**
+ * Helpers function to find out the quota type (USRQUOTA/GRPQUOTA) of a
+ * given object
+ */
+static inline int fid2type(const struct lu_fid *fid)
+{
+       LASSERT(fid_is_acct(fid));
+       if (fid_oid(fid) == ACCT_GROUP_OID)
+               return GRPQUOTA;
+       return USRQUOTA;
+}
+
+static inline int obj2type(struct dt_object *obj)
+{
+       return fid2type(lu_object_fid(&obj->do_lu));
+}
+
+/**
+ * Space Accounting Management
+ */
+
+/**
+ * Look up an accounting object based on its fid.
+ *
+ * \param info - is the osd thread info passed by the caller
+ * \param osd  - is the osd device
+ * \param fid  - is the fid of the accounting object we want to look up
+ * \param id   - is the osd_inode_id struct to fill with the inode number of
+ *               the quota file if the lookup is successful
+ */
+int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, struct osd_inode_id *id)
+{
+       struct super_block *sb = osd_sb(osd);
+
+       ENTRY;
+       LASSERT(fid_is_acct(fid));
+
+       if (!LDISKFS_HAS_RO_COMPAT_FEATURE(sb,
+                                          LDISKFS_FEATURE_RO_COMPAT_QUOTA))
+               RETURN(-ENOENT);
+
+       id->oii_gen = OSD_OII_NOGEN;
+       id->oii_ino = LDISKFS_SB(sb)->s_qf_inums[fid2type(fid)];
+       if (!ldiskfs_valid_inum(sb, id->oii_ino))
+               RETURN(-ENOENT);
+       RETURN(0);
+}
+
+/**
+ * Return space usage (#blocks & #inodes) consumed by a given uid or gid.
+ *
+ * \param env   - is the environment passed by the caller
+ * \param dtobj - is the accounting object
+ * \param dtrec - is the record to fill with space usage information
+ * \param dtkey - is the id the of the user or group for which we would
+ *                like to access disk usage.
+ * \param capa - is the capability, not used.
+ *
+ * \retval +ve - success : exact match
+ * \retval -ve - failure
+ */
+static int osd_acct_index_lookup(const struct lu_env *env,
+                                struct dt_object *dtobj,
+                                struct dt_rec *dtrec,
+                                const struct dt_key *dtkey,
+                                struct lustre_capa *capa)
+{
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct if_dqblk         *dqblk = &info->oti_dqblk;
+       struct super_block      *sb = osd_sb(osd_obj2dev(osd_dt_obj(dtobj)));
+       struct acct_rec         *rec = (struct acct_rec *)dtrec;
+       __u64                    id = *((__u64 *)dtkey);
+       int                      rc;
+
+       ENTRY;
+
+       memset((void *)dqblk, 0, sizeof(struct obd_dqblk));
+       rc = sb->s_qcop->get_dqblk(sb, obj2type(dtobj), (qid_t) id, dqblk);
+       if (rc)
+               RETURN(rc);
+       rec->bspace = dqblk->dqb_curspace;
+       rec->ispace = dqblk->dqb_curinodes;
+       RETURN(+1);
+}
+
+#define QUOTA_IT_READ_ERROR(it, rc)                                    \
+       CERROR("%s: Error while trying to read quota information, "    \
+              "failed with %d\n",                                     \
+              it->oiq_obj->oo_dt.do_lu.lo_dev->ld_obd->obd_name, rc); \
+
+/**
+ * Initialize osd Iterator for given osd index object.
+ *
+ * \param  dt    - osd index object
+ * \param  attr  - not used
+ * \param  capa  - BYPASS_CAPA
+ */
+static struct dt_it *osd_it_acct_init(const struct lu_env *env,
+                                     struct dt_object *dt,
+                                     __u32 attr, struct lustre_capa *capa)
+{
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct osd_it_quota     *it;
+       struct lu_object        *lo = &dt->do_lu;
+       struct osd_object       *obj = osd_dt_obj(dt);
+
+       ENTRY;
+
+       LASSERT(lu_object_exists(lo));
+
+       if (info == NULL)
+               RETURN(ERR_PTR(-ENOMEM));
+
+       it = &info->oti_it_quota;
+       memset(it, 0, sizeof(*it));
+       lu_object_get(lo);
+       it->oiq_obj = obj;
+
+       /* LUSTRE_DQTREEOFF is the initial offset where the tree can be found */
+       it->oiq_blk[0] = LUSTRE_DQTREEOFF;
+
+       /* NB: we don't need to store the tree depth since it is always
+        * equal to LUSTRE_DQTREEDEPTH - 1 (root has depth = 0) for a leaf
+        * block. */
+       RETURN((struct dt_it *)it);
+}
+
+/**
+ * Free given iterator.
+ *
+ * \param  di   - osd iterator
+ */
+static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
+{
+       struct osd_it_quota *it = (struct osd_it_quota *)di;
+
+       ENTRY;
+       lu_object_put(env, &it->oiq_obj->oo_dt.do_lu);
+       EXIT;
+}
+
+/**
+ * Move Iterator to record specified by \a key, if the \a key isn't found,
+ * move to the first valid record.
+ *
+ * \param  di   - osd iterator
+ * \param  key  - uid or gid
+ *
+ * \retval +ve  - di points to the first valid record
+ * \retval  +1  - di points to exact matched key
+ * \retval -ve  - failure
+ */
+static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
+                          const struct dt_key *key)
+{
+       struct osd_it_quota     *it = (struct osd_it_quota *)di;
+       const struct lu_fid     *fid =
+                               lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
+       int                      type = fid2type(fid);
+       qid_t                    dqid = *(qid_t *)key;
+       loff_t                   offset;
+       int                      rc;
+
+       ENTRY;
+
+       offset = find_tree_dqentry(env, it->oiq_obj, type, dqid,
+                                  LUSTRE_DQTREEOFF, 0, it);
+       if (offset > 0) { /* Found */
+               RETURN(+1);
+       } else if (offset < 0) { /* Error */
+               QUOTA_IT_READ_ERROR(it, (int)offset);
+               RETURN((int)offset);
+       }
+
+       /* The @key is not found, move to the first valid entry */
+       rc = walk_tree_dqentry(env, it->oiq_obj, type, it->oiq_blk[0], 0,
+                              0, it);
+       if (rc == 0)
+               rc = 1;
+       else if (rc > 0)
+               rc = -ENOENT;
+
+       RETURN(rc);
+}
+
+/**
+ * Release Iterator
+ *
+ * \param  di   - osd iterator
+ */
+static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
+{
+       return;
+}
+
+/**
+ * Move on to the next valid entry.
+ *
+ * \param  di   - osd iterator
+ *
+ * \retval +ve  - iterator reached the end
+ * \retval   0  - iterator has not reached the end yet
+ * \retval -ve  - unexpected failure
+ */
+static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
+{
+       struct osd_it_quota     *it = (struct osd_it_quota *)di;
+       const struct lu_fid     *fid =
+                               lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
+       int                      type = fid2type(fid);
+       int                      depth, rc;
+       uint                     index;
+
+       ENTRY;
+
+       /* Let's first check if there are any remaining valid entry in the
+        * current leaf block. Start with the next entry after the current one.
+        */
+       depth = LUSTRE_DQTREEDEPTH - 1;
+       index = GETIDINDEX(it->oiq_id, depth);
+       if (++index < LUSTRE_DQSTRINBLK) {
+               /* Search for the next valid entry from current index */
+               rc = walk_block_dqentry(env, it->oiq_obj, type,
+                                       it->oiq_blk[depth], index, it);
+               if (rc < 0) {
+                       QUOTA_IT_READ_ERROR(it, rc);
+                       RETURN(rc);
+               } else if (rc == 0) {
+                       /* Found on entry, @it is already updated to the
+                        * new position in walk_block_dqentry(). */
+                       RETURN(0);
+               }
+       }
+       rc = 1;
+
+       /* We have consumed all the entries of the current leaf block, move on
+        * to the next one. */
+       depth--;
+
+       /* We keep searching as long as walk_tree_dqentry() returns +1
+        * (= no valid entry found). */
+       for (; depth >= 0 && rc > 0; depth--) {
+               index = GETIDINDEX(it->oiq_id, depth);
+               if (++index > 0xff)
+                       continue;
+               rc = walk_tree_dqentry(env, it->oiq_obj, type,
+                                      it->oiq_blk[depth], depth, index, it);
+       }
+
+       if (rc < 0)
+               QUOTA_IT_READ_ERROR(it, rc);
+       RETURN(rc);
+}
+
+/**
+ * Return pointer to the key under iterator.
+ *
+ * \param  di   - osd iterator
+ */
+static struct dt_key *osd_it_acct_key(const struct lu_env *env,
+                                     const struct dt_it *di)
+{
+       struct osd_it_quota *it = (struct osd_it_quota *)di;
+
+       ENTRY;
+       RETURN((struct dt_key *)&it->oiq_id);
+}
+
+/**
+ * Return size of key under iterator (in bytes)
+ *
+ * \param  di   - osd iterator
+ */
+static int osd_it_acct_key_size(const struct lu_env *env,
+                               const struct dt_it *di)
+{
+       struct osd_it_quota *it = (struct osd_it_quota *)di;
+
+       ENTRY;
+       RETURN((int)sizeof(it->oiq_id));
+}
+
+/**
+ * Return pointer to the record under iterator.
+ *
+ * \param  di    - osd iterator
+ * \param  attr  - not used
+ */
+static int osd_it_acct_rec(const struct lu_env *env,
+                          const struct dt_it *di,
+                          struct dt_rec *dtrec, __u32 attr)
+{
+       struct osd_it_quota     *it = (struct osd_it_quota *)di;
+       const struct dt_key     *key = osd_it_acct_key(env, di);
+       int                      rc;
+
+       ENTRY;
+
+       rc = osd_acct_index_lookup(env, &it->oiq_obj->oo_dt, dtrec, key,
+                                  BYPASS_CAPA);
+       RETURN(rc > 0 ? 0 : rc);
+}
+
+/**
+ * Returns cookie for current Iterator position.
+ *
+ * \param  di    - osd iterator
+ */
+static __u64 osd_it_acct_store(const struct lu_env *env,
+                              const struct dt_it *di)
+{
+       struct osd_it_quota *it = (struct osd_it_quota *)di;
+
+       ENTRY;
+       RETURN(it->oiq_id);
+}
+
+/**
+ * Restore iterator from cookie. if the \a hash isn't found,
+ * restore the first valid record.
+ *
+ * \param  di    - osd iterator
+ * \param  hash  - iterator location cookie
+ *
+ * \retval +ve   - di points to the first valid record
+ * \retval  +1   - di points to exact matched hash
+ * \retval -ve   - failure
+ */
+static int osd_it_acct_load(const struct lu_env *env,
+                           const struct dt_it *di, __u64 hash)
+{
+       ENTRY;
+       RETURN(osd_it_acct_get(env, (struct dt_it *)di,
+                              (const struct dt_key *)&hash));
+}
+
+/**
+ * Index and Iterator operations for accounting objects
+ */
+const struct dt_index_operations osd_acct_index_ops = {
+       .dio_lookup     = osd_acct_index_lookup,
+       .dio_it         = {
+               .init           = osd_it_acct_init,
+               .fini           = osd_it_acct_fini,
+               .get            = osd_it_acct_get,
+               .put            = osd_it_acct_put,
+               .next           = osd_it_acct_next,
+               .key            = osd_it_acct_key,
+               .key_size       = osd_it_acct_key_size,
+               .rec            = osd_it_acct_rec,
+               .store          = osd_it_acct_store,
+               .load           = osd_it_acct_load
+       }
+};
+
diff --git a/lustre/osd-ldiskfs/osd_quota_fmt.c b/lustre/osd-ldiskfs/osd_quota_fmt.c
new file mode 100644 (file)
index 0000000..4dbfa07
--- /dev/null
@@ -0,0 +1,284 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Use is subject to license terms.
+ *
+ * Lustre administrative quota format.
+ * from linux/fs/quota_v2.c
+ */
+
+#include "osd_internal.h"
+#include "osd_quota_fmt.h"
+
+typedef char *dqbuf_t;
+
+static const union
+{
+       struct lustre_disk_dqblk_v2 r1;
+} emptydquot = { .r1 = { 0 } };
+
+static inline dqbuf_t getdqbuf(void)
+{
+       dqbuf_t buf = cfs_alloc(LUSTRE_DQBLKSIZE, CFS_ALLOC_IO);
+       if (!buf)
+               CWARN("Not enough memory for quota buffers.\n");
+       return buf;
+}
+
+static inline void freedqbuf(dqbuf_t buf)
+{
+       cfs_free(buf);
+}
+
+/**
+ * Read the \a blk into \a buf.
+ *
+ * TODO Will support enforcement quota later.
+ */
+static ssize_t quota_read_blk(const struct lu_env *env,
+                             struct osd_object *obj,
+                             int type, uint blk, dqbuf_t buf)
+{
+       ssize_t ret;
+       struct super_block *sb = obj->oo_inode->i_sb;
+
+       ENTRY;
+
+       memset(buf, 0, LUSTRE_DQBLKSIZE);
+       ret = sb->s_op->quota_read(sb, type, buf, LUSTRE_DQBLKSIZE,
+                                  blk << LUSTRE_DQBLKSIZE_BITS);
+
+       /* Reading past EOF just returns a block of zeros */
+       if (ret == -EBADR)
+               ret = 0;
+
+       RETURN(ret);
+}
+
+/**
+ * Find entry in block by given \a dqid in the leaf block \a blk
+ *
+ * \retval +ve, the offset of the entry in file
+ * \retval   0, entry not found
+ * \retval -ve, unexpected failure
+ */
+static loff_t find_block_dqentry(const struct lu_env *env,
+                                struct osd_object *obj, int type,
+                                qid_t dqid, uint blk,
+                                struct osd_it_quota *it)
+{
+       dqbuf_t                          buf = getdqbuf();
+       loff_t                           ret;
+       int                              i;
+       struct lustre_disk_dqblk_v2     *ddquot;
+       int                              dqblk_sz;
+
+       ENTRY;
+
+       ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf);
+       dqblk_sz = sizeof(struct lustre_disk_dqblk_v2);
+       if (!buf)
+               RETURN(-ENOMEM);
+       ret = quota_read_blk(env, obj, type, blk, buf);
+       if (ret < 0) {
+               CERROR("Can't read quota tree block %u.\n", blk);
+               GOTO(out_buf, ret);
+       }
+
+       if (dqid) {
+               for (i = 0; i < LUSTRE_DQSTRINBLK &&
+                           le32_to_cpu(ddquot[i].dqb_id) != dqid; i++)
+                       continue;
+       } else { /* ID 0 as a bit more complicated searching... */
+               for (i = 0; i < LUSTRE_DQSTRINBLK; i++)
+                       if (!le32_to_cpu(ddquot[i].dqb_id) &&
+                           memcmp((char *)&emptydquot, (char *)&ddquot[i],
+                                  dqblk_sz))
+                               break;
+       }
+       if (i == LUSTRE_DQSTRINBLK) {
+               CDEBUG(D_QUOTA, "Quota for id %u not found.\n", dqid);
+               ret = 0;
+               GOTO(out_buf, ret);
+       } else {
+               ret = (blk << LUSTRE_DQBLKSIZE_BITS) +
+                     sizeof(struct lustre_disk_dqdbheader) + i * dqblk_sz;
+
+               if (it) {
+                       it->oiq_blk[LUSTRE_DQTREEDEPTH - 1] = blk;
+                       it->oiq_offset = ret;
+                       it->oiq_id = dqid;
+               } else {
+                       ret = 0;
+               }
+       }
+out_buf:
+       freedqbuf(buf);
+       RETURN(ret);
+}
+
+/**
+ * Find entry for given \a dqid in the tree block \a blk
+ *
+ * \retval +ve, the offset of the entry in file
+ * \retval   0, entry not found
+ * \retval -ve, unexpected failure
+ */
+loff_t find_tree_dqentry(const struct lu_env *env,
+                        struct osd_object *obj, int type,
+                        qid_t dqid, uint blk, int depth,
+                        struct osd_it_quota *it)
+{
+       dqbuf_t  buf = getdqbuf();
+       loff_t   ret;
+       u32     *ref = (u32 *) buf;
+
+       ENTRY;
+
+       if (!buf)
+               RETURN(-ENOMEM);
+       ret = quota_read_blk(env, obj, 0, blk, buf);
+       if (ret < 0) {
+               CERROR("Can't read quota tree block %u.\n", blk);
+               GOTO(out_buf, ret);
+       }
+       ret = 0;
+       blk = le32_to_cpu(ref[GETIDINDEX(dqid, depth)]);
+       if (!blk)               /* No reference? */
+               GOTO(out_buf, ret);
+
+       if (depth < LUSTRE_DQTREEDEPTH - 1)
+               ret = find_tree_dqentry(env, obj, type, dqid, blk,
+                                       depth + 1, it);
+       else
+               ret = find_block_dqentry(env, obj, type, dqid, blk, it);
+
+       if (it && ret > 0) /* Entry found */
+               it->oiq_blk[depth] = blk;
+out_buf:
+       freedqbuf(buf);
+       RETURN(ret);
+}
+
+/**
+ * Search from \a index within the leaf block \a blk, and fill the \a it with
+ * the first valid entry.
+ *
+ * \retval +ve, no valid entry found
+ * \retval   0, entry found
+ * \retval -ve, unexpected failure
+ */
+int walk_block_dqentry(const struct lu_env *env, struct osd_object *obj,
+                      int type, uint blk, uint index,
+                      struct osd_it_quota *it)
+{
+       dqbuf_t                          buf = getdqbuf();
+       loff_t                           ret = 0;
+       struct lustre_disk_dqdbheader   *dqhead;
+       int                              i, dqblk_sz;
+       struct lustre_disk_dqblk_v2     *ddquot;
+
+       ENTRY;
+
+       dqhead = (struct lustre_disk_dqdbheader *)buf;
+       dqblk_sz = sizeof(struct lustre_disk_dqblk_v2);
+       if (!buf)
+               RETURN(-ENOMEM);
+       ret = quota_read_blk(env, obj, type, blk, buf);
+       if (ret < 0) {
+               CERROR("Can't read quota tree block %u.\n", blk);
+               GOTO(out_buf, ret);
+       }
+       ret = 1;
+
+       if (!le32_to_cpu(dqhead->dqdh_entries))
+               GOTO(out_buf, ret);
+
+       ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf);
+       LASSERT(index < LUSTRE_DQSTRINBLK);
+       for (i = index; i < LUSTRE_DQSTRINBLK; i++) {
+               /* skip empty entry */
+               if (!memcmp((char *)&emptydquot,
+                           (char *)&ddquot[i], dqblk_sz))
+                       continue;
+
+               it->oiq_blk[LUSTRE_DQTREEDEPTH - 1] = blk;
+               it->oiq_id = le32_to_cpu(ddquot[i].dqb_id);
+               it->oiq_offset = (blk << LUSTRE_DQBLKSIZE_BITS) +
+                                 sizeof(struct lustre_disk_dqdbheader) +
+                                 i * dqblk_sz;
+               ret = 0;
+               break;
+       }
+
+out_buf:
+       freedqbuf(buf);
+       RETURN(ret);
+}
+
+/**
+ * Search from \a index within the tree block \a blk, and fill the \a it
+ * with the first valid entry.
+ *
+ * \retval +ve, no valid entry found
+ * \retval   0, entry found
+ * \retval -ve, unexpected failure
+ */
+int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj,
+                     int type, uint blk, int depth, uint index,
+                     struct osd_it_quota *it)
+{
+       dqbuf_t  buf = getdqbuf();
+       loff_t   ret;
+       u32     *ref = (u32 *) buf;
+
+       ENTRY;
+
+       if (!buf)
+               RETURN(-ENOMEM);
+       ret = quota_read_blk(env, obj, type, blk, buf);
+       if (ret < 0) {
+               CERROR("Can't read quota tree block %u.\n", blk);
+               goto out_buf;
+       }
+       ret = 1;
+
+       for (; index <= 0xff && ret > 0; index++) {
+               blk = le32_to_cpu(ref[index]);
+               if (!blk)       /* No reference */
+                       continue;
+
+               if (depth < LUSTRE_DQTREEDEPTH - 1)
+                       ret = walk_tree_dqentry(env, obj, type, blk,
+                                               depth + 1, 0, it);
+               else
+                       ret = walk_block_dqentry(env, obj, type, blk, 0, it);
+       }
+
+       if (ret == 0) /* Entry found */
+               it->oiq_blk[depth] = blk;
+out_buf:
+       freedqbuf(buf);
+       RETURN(ret);
+}
diff --git a/lustre/osd-ldiskfs/osd_quota_fmt.h b/lustre/osd-ldiskfs/osd_quota_fmt.h
new file mode 100644 (file)
index 0000000..2857a7d
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Use is subject to license terms.
+ *
+ * Lustre ldiskfs quota format
+ * from include/linux/quotaio_v2.h
+ */
+#ifndef _OSD_QUOTA_FMT_H
+#define _OSD_QUOTA_FMT_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is a radix tree whose leaves point
+ * to blocks of these structures. for the version 2.
+ */
+struct lustre_disk_dqblk_v2 {
+       __u32 dqb_id;         /**< id this quota applies to */
+       __u32 padding;
+       __u64 dqb_ihardlimit; /**< absolute limit on allocated inodes */
+       __u64 dqb_isoftlimit; /**< preferred inode limit */
+       __u64 dqb_curinodes;  /**< current # allocated inodes */
+       /**< absolute limit on disk space (in QUOTABLOCK_SIZE) */
+       __u64 dqb_bhardlimit;
+       /**< preferred limit on disk space (in QUOTABLOCK_SIZE) */
+       __u64 dqb_bsoftlimit;
+       __u64 dqb_curspace;   /**< current space occupied (in bytes) */
+       obd_time dqb_btime;   /**< time limit for excessive disk use */
+       obd_time dqb_itime;   /**< time limit for excessive inode use */
+};
+
+/* Number of entries in one blocks(14 entries) */
+#define LUSTRE_DQSTRINBLK \
+               ((LUSTRE_DQBLKSIZE - sizeof(struct lustre_disk_dqdbheader)) \
+                / sizeof(struct lustre_disk_dqblk_v2))
+#define GETENTRIES(buf) (((char *)buf)+sizeof(struct lustre_disk_dqdbheader))
+
+/*
+ * Here are header structures as written on disk and their in-memory copies
+ */
+/* First generic header */
+struct lustre_disk_dqheader {
+       __u32 dqh_magic; /* Magic number identifying file */
+       __u32 dqh_version; /* File version */
+};
+
+/* Header with type and version specific information */
+struct lustre_disk_dqinfo {
+       /* Time before block soft limit becomes hard limit */
+       __u32 dqi_bgrace;
+       /* Time before inode soft limit becomes hard limit */
+       __u32 dqi_igrace;
+       /* Flags for quotafile (DQF_*) */
+       __u32 dqi_flags;
+       /* Number of blocks in file */
+       __u32 dqi_blocks;
+       /* Number of first free block in the list */
+       __u32 dqi_free_blk;
+       /* Number of block with at least one free entry */
+       __u32 dqi_free_entry;
+};
+
+/*
+ *  Structure of header of block with quota structures. It is padded to
+ *  16 bytes so there will be space for exactly 21 quota-entries in a block
+ */
+struct lustre_disk_dqdbheader {
+       __u32 dqdh_next_free; /* Number of next block with free entry */
+       __u32 dqdh_prev_free; /* Number of previous block with free entry */
+       __u16 dqdh_entries;   /* Number of valid entries in block */
+       __u16 dqdh_pad1;
+       __u32 dqdh_pad2;
+};
+
+/* Offset of info header in file */
+#define LUSTRE_DQINFOOFF       sizeof(struct lustre_disk_dqheader)
+#define LUSTRE_DQBLKSIZE_BITS  10
+/* Size of block with quota structures */
+#define LUSTRE_DQBLKSIZE       (1 << LUSTRE_DQBLKSIZE_BITS)
+/* Offset of tree in file in blocks */
+#define LUSTRE_DQTREEOFF       1
+/* Depth of quota tree */
+#define LUSTRE_DQTREEDEPTH     4
+
+#define GETIDINDEX(id, depth)  (((id) >> \
+                               ((LUSTRE_DQTREEDEPTH - (depth) - 1) * 8)) & \
+                               0xff)
+#endif /* osd_quota_fmt.h */
index dfffb86..0f5d984 100644 (file)
@@ -1,5 +1,4 @@
 /*
- *
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.