From 57df6353ca590f32a32c0f0dc4a1d10a86a027ec Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Tue, 19 Jun 2012 23:49:35 +0400 Subject: [PATCH] LU-1182 ldiskfs-osd: space accounting support Add space accounting support to ldiskfs OSD. This patch also sets initial attributes in do_create(). mdd_attr_set_internal() from mdd_object_initialize() is kept until EDQUOT is returned in lquota itself. Attributes of new inodes are now initialized in osd_object_create(). All LA_MODE bits are now passed to ldiskfs_create_inode(). (original patch from LiWei, see ORI-46) Signed-off-by: Johann Lombardi Change-Id: I77a621c76343c2633810bb3cef9859ee30b7b23a Reviewed-on: http://review.whamcloud.com/3160 Reviewed-by: Niu Yawei Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger --- .../patches/ext4-quota-dont-update-cmtime.patch | 92 +++++ .../patches/ext4-quota-first-class.patch | 399 +++++++++++++++++++++ .../ext4-quota-force-block-alloc-quotaoff.patch | 62 ++++ .../patches/ext4-quota-minimal-rhel5.patch | 20 ++ .../series/ldiskfs-2.6-rhel5-ext4.series | 1 + .../kernel_patches/series/ldiskfs-2.6-rhel6.series | 3 + lustre/osd-ldiskfs/Makefile.in | 2 +- lustre/osd-ldiskfs/autoMakefile.am | 2 +- lustre/osd-ldiskfs/osd_handler.c | 226 +++++++++--- lustre/osd-ldiskfs/osd_internal.h | 57 ++- lustre/osd-ldiskfs/osd_oi.c | 3 + lustre/osd-ldiskfs/osd_quota.c | 389 ++++++++++++++++++++ lustre/osd-ldiskfs/osd_quota_fmt.c | 284 +++++++++++++++ lustre/osd-ldiskfs/osd_quota_fmt.h | 112 ++++++ lustre/quota/lquota_lib.c | 1 - 15 files changed, 1586 insertions(+), 67 deletions(-) create mode 100644 ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch create mode 100644 ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch create mode 100644 ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch create mode 100644 ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch create mode 100644 lustre/osd-ldiskfs/osd_quota.c create mode 100644 lustre/osd-ldiskfs/osd_quota_fmt.c create mode 100644 lustre/osd-ldiskfs/osd_quota_fmt.h diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch b/ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch new file mode 100644 index 0000000..4c0a7f5 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch @@ -0,0 +1,92 @@ +commit 21f976975cbecbdaf23ceeacc1cab2b1c05a028e +Author: Jan Kara +Date: Mon Apr 4 15:33:39 2011 -0400 + + ext4: remove unnecessary [cm]time update of quota file + + It is not necessary to update [cm]time of quota file on each quota + file write and it wastes journal space and IO throughput with inode + writes. So just remove the updating from ext4_quota_write() and only + update times when quotas are being turned off. Userspace cannot get + anything reliable from quota files while they are used by the kernel + anyway. + + Signed-off-by: Jan Kara + Signed-off-by: "Theodore Ts'o" + +Index: linux-stage/fs/ext4/ext4_jbd2.h +=================================================================== +--- linux-stage.orig/fs/ext4/ext4_jbd2.h 2012-06-26 11:26:25.000000000 +0200 ++++ linux-stage/fs/ext4/ext4_jbd2.h 2012-06-26 11:35:31.025105000 +0200 +@@ -88,8 +88,8 @@ + + #ifdef CONFIG_QUOTA + /* Amount of blocks needed for quota update - we know that the structure was +- * allocated so we need to update only inode+data */ +-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) ++ * allocated so we need to update only data block */ ++#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) + /* Amount of blocks needed for quota insert/delete - we do some block writes + * but inode, sb and group updates are done only once */ + #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ +Index: linux-stage/fs/ext4/super.c +=================================================================== +--- linux-stage.orig/fs/ext4/super.c 2012-06-26 11:35:09.000000000 +0200 ++++ linux-stage/fs/ext4/super.c 2012-06-26 11:37:30.905374000 +0200 +@@ -4582,6 +4582,7 @@ static int ext4_quota_on(struct super_bl + static int ext4_quota_off(struct super_block *sb, int type, int remount) + { + struct quota_info *dqopt = sb_dqopt(sb); ++ int cnt; + + mutex_lock(&dqopt->dqonoff_mutex); + if (!sb_any_quota_loaded(sb)) { +@@ -4598,6 +4599,37 @@ static int ext4_quota_off(struct super_b + up_read(&sb->s_umount); + } + ++ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { ++ struct inode *inode; ++ handle_t *handle; ++ ++ if (type != -1 && cnt != type) ++ continue; ++ ++ mutex_lock(&dqopt->dqonoff_mutex); ++ inode = dqopt->files[cnt]; ++ if (!sb_has_quota_loaded(sb, cnt) || !inode) { ++ mutex_unlock(&dqopt->dqonoff_mutex); ++ continue; ++ } ++ ++ inode = igrab(inode); ++ mutex_unlock(&dqopt->dqonoff_mutex); ++ ++ if (!inode) ++ continue; ++ ++ /* Update modification times of quota files when userspace can ++ * start looking at them */ ++ handle = ext4_journal_start(inode, 1); ++ if (!IS_ERR(handle)) { ++ inode->i_mtime = inode->i_ctime = CURRENT_TIME; ++ ext4_mark_inode_dirty(handle, inode); ++ ext4_journal_stop(handle); ++ } ++ iput(inode); ++ } ++ + return vfs_quota_off(sb, type, remount); + } + +@@ -4696,9 +4728,8 @@ out: + if (inode->i_size < off + len) { + i_size_write(inode, off + len); + EXT4_I(inode)->i_disksize = inode->i_size; ++ ext4_mark_inode_dirty(handle, inode); + } +- inode->i_mtime = inode->i_ctime = CURRENT_TIME; +- ext4_mark_inode_dirty(handle, inode); + mutex_unlock(&inode->i_mutex); + return len; + } diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch b/ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch new file mode 100644 index 0000000..5452398 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch @@ -0,0 +1,399 @@ +From: Aditya Kali + +This patch is an attempt towards supporting quotas as first class +feature in ext4. It is based on the proposal at: +https://ext4.wiki.kernel.org/index.php/Design_For_1st_Class_Quota_in_Ext4 +This patch introduces a new feature - EXT4_FEATURE_RO_COMPAT_QUOTA which, when +turned on, enables quota accounting at mount time iteself. Also, the +quota inodes are stored in two additional superblock fields. +Some changes introduced by this patch that should be pointed out are: +1) Two new ext4-superblock fields - s_usr_quota_inum and s_grp_quota_inum + for storing the quota inodes in use. +2) If the QUOTA feature and corresponding quota inodes are set in superblock, + Quotas are turned on at mount time irrespective of the quota mount options. + Thus the mount options 'quota', 'usrquota' and 'grpquota' are completely + ignored with the new QUOTA feature flag. +3) Default quota inodes are: inode#3 for tracking userquota and inode#4 for + tracking group quota. The superblock fields can be set to use other inodes + as well. +4) mke2fs or tune2fs will initialize these inodes when quota feature is + being set. The default reserved inodes will not be visible to user as + regular files. +5) Once quotas are turned on, they cannot be turned off while the FS is + mounted. This is because we do not want to let the quota get inconsistent. +6) With the QUOTA feature set, since the quota inodes are hidden, some of the + utilities from quota-tools will no longer work correctly. Instead, e2fsprogs + will include support for fixing the quota files. +7) Support is only for the new V2 quota file format. + +Signed-off-by: Aditya Kali +--- +Index: linux-stage/fs/ext4/ext4.h +=================================================================== +--- linux-stage.orig/fs/ext4/ext4.h 2012-06-26 11:26:23.345235745 +0200 ++++ linux-stage/fs/ext4/ext4.h 2012-06-26 11:37:38.250355000 +0200 +@@ -162,6 +162,8 @@ typedef struct ext4_io_end { + */ + #define EXT4_BAD_INO 1 /* Bad blocks inode */ + #define EXT4_ROOT_INO 2 /* Root inode */ ++#define EXT4_USR_QUOTA_INO 3 /* User quota inode */ ++#define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */ + #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ + #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ + #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ +@@ -1016,7 +1018,9 @@ struct ext4_super_block { + __u8 s_last_error_func[32]; /* function where the error happened */ + #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) + __u8 s_mount_opts[64]; +- __le32 s_reserved[112]; /* Padding to the end of the block */ ++ __le32 s_usr_quota_inum; /* inode for tracking user quota */ ++ __le32 s_grp_quota_inum; /* inode for tracking group quota */ ++ __le32 s_reserved[110]; /* Padding to the end of the block */ + }; + + #ifdef __KERNEL__ +@@ -1090,6 +1094,7 @@ struct ext4_sb_info { + #ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ ++ unsigned long s_qf_inums[MAXQUOTAS]; /* Quota file inodes */ + #endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ + struct rb_root system_blks; +@@ -1189,6 +1194,8 @@ static inline struct timespec ext4_curre + static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) + { + return ino == EXT4_ROOT_INO || ++ ino == EXT4_USR_QUOTA_INO || ++ ino == EXT4_GRP_QUOTA_INO || + ino == EXT4_JOURNAL_INO || + ino == EXT4_RESIZE_INO || + (ino >= EXT4_FIRST_INO(sb) && +@@ -1293,6 +1300,7 @@ EXT4_INODE_BIT_FNS(state, state_flags) + #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 + #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 + #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 ++#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 + + #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 + #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 +@@ -1325,7 +1333,8 @@ EXT4_INODE_BIT_FNS(state, state_flags) + EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ +- EXT4_FEATURE_RO_COMPAT_HUGE_FILE) ++ EXT4_FEATURE_RO_COMPAT_HUGE_FILE| \ ++ EXT4_FEATURE_RO_COMPAT_QUOTA) + + /* + * Default values for user and/or group using reserved blocks +Index: linux-stage/fs/ext4/ext4_jbd2.h +=================================================================== +--- linux-stage.orig/fs/ext4/ext4_jbd2.h 2012-06-26 11:35:31.025105000 +0200 ++++ linux-stage/fs/ext4/ext4_jbd2.h 2012-06-26 11:37:38.250631000 +0200 +@@ -89,14 +89,20 @@ + #ifdef CONFIG_QUOTA + /* Amount of blocks needed for quota update - we know that the structure was + * allocated so we need to update only data block */ +-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) ++#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ++ EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ ++ 1 : 0) + /* Amount of blocks needed for quota insert/delete - we do some block writes + * but inode, sb and group updates are done only once */ +-#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ +- (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) ++#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ++ EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ ++ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ ++ +3+DQUOT_INIT_REWRITE) : 0) + +-#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ +- (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) ++#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ++ EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ ++ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ ++ +3+DQUOT_DEL_REWRITE) : 0) + #else + #define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 + #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 +Index: linux-stage/fs/ext4/super.c +=================================================================== +--- linux-stage.orig/fs/ext4/super.c 2012-06-26 11:37:30.905374000 +0200 ++++ linux-stage/fs/ext4/super.c 2012-06-26 11:38:30.997488000 +0200 +@@ -86,6 +86,11 @@ wait_queue_head_t aio_wq[WQ_HASH_SZ]; + + static int bigendian_extents; + ++#ifdef CONFIG_QUOTA ++static int ext4_acct_on(struct super_block *sb); ++static int ext4_acct_off(struct super_block *sb); ++#endif ++ + ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, + struct ext4_group_desc *bg) + { +@@ -670,6 +675,12 @@ static void ext4_put_super(struct super_ + + ext4_unregister_li_request(sb); + ++#ifdef CONFIG_QUOTA ++ /* disable usage tracking which was enabled at mount time */ ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ++ ext4_acct_off(sb); ++#endif ++ + flush_workqueue(sbi->dio_unwritten_wq); + destroy_workqueue(sbi->dio_unwritten_wq); + +@@ -2142,14 +2153,22 @@ static void ext4_orphan_cleanup(struct s + #ifdef CONFIG_QUOTA + /* Needed for iput() to work correctly and not trash data */ + sb->s_flags |= MS_ACTIVE; +- /* Turn on quotas so that they are updated correctly */ +- for (i = 0; i < MAXQUOTAS; i++) { +- if (EXT4_SB(sb)->s_qf_names[i]) { +- int ret = ext4_quota_on_mount(sb, i); +- if (ret < 0) +- ext4_msg(sb, KERN_ERR, +- "Cannot turn on journaled " +- "quota: error %d", ret); ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { ++ int ret; ++ ret = ext4_acct_on(sb); ++ if (ret) ++ ext4_msg(sb, KERN_ERR, "Failed to turn on usage " ++ "tracking for quota: error %d", ret); ++ } else { ++ /* Turn on quotas so that they are updated correctly */ ++ for (i = 0; i < MAXQUOTAS; i++) { ++ if (EXT4_SB(sb)->s_qf_names[i]) { ++ int ret = ext4_quota_on_mount(sb, i); ++ if (ret < 0) ++ ext4_msg(sb, KERN_ERR, ++ "Cannot turn on journaled " ++ "quota: error %d", ret); ++ } + } + } + #endif +@@ -2193,10 +2212,14 @@ static void ext4_orphan_cleanup(struct s + ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", + PLURAL(nr_truncates)); + #ifdef CONFIG_QUOTA +- /* Turn quotas off */ +- for (i = 0; i < MAXQUOTAS; i++) { +- if (sb_dqopt(sb)->files[i]) +- vfs_quota_off(sb, i, 0); ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { ++ ext4_acct_off(sb); ++ } else { ++ /* Turn quotas off */ ++ for (i = 0; i < MAXQUOTAS; i++) { ++ if (sb_dqopt(sb)->files[i]) ++ vfs_quota_off(sb, i, 0); ++ } + } + #endif + sb->s_flags = s_flags; /* Restore MS_RDONLY status */ +@@ -3395,6 +3418,15 @@ static int ext4_fill_super(struct super_ + #ifdef CONFIG_QUOTA + sb->s_qcop = &ext4_qctl_operations; + sb->dq_op = &ext4_quota_operations; ++ ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { ++ /* Use new qctl operations with quota on function that does not ++ * require user specified quota file path. */ ++ sb->s_qcop = &ext4_qctl_operations; ++ ++ sbi->s_qf_inums[USRQUOTA] = es->s_usr_quota_inum; ++ sbi->s_qf_inums[GRPQUOTA] = es->s_grp_quota_inum; ++ } + #endif + INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ + mutex_init(&sbi->s_orphan_lock); +@@ -3622,8 +3654,31 @@ no_journal: + } else + descr = "out journal"; + +- ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " +- "Opts: %s%s", descr, sbi->s_es->s_mount_opts, ++#ifdef CONFIG_QUOTA ++ /* Enable space tracking during mount, enforcement can be enabled/disable ++ * later with quota_on/off */ ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && ++ !(sb->s_flags & MS_RDONLY)) { ++ ret = ext4_acct_on(sb); ++ if (ret) { ++ ext4_msg(sb, KERN_ERR, "Can't enable usage tracking on " ++ "a filesystem with the QUOTA feature set"); ++ goto failed_mount4; ++ } ++ } ++#else ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && ++ !(sb->s_flags & MS_RDONLY)) ++ ext4_msg(sb, KERN_WARNING, "Mounting a filesystem with the " ++ "QUOTA feature set whereas the kernel does not " ++ "support quota, e2fsck will be required to fix usage " ++ "information"); ++ ++#endif /* CONFIG_QUOTA */ ++ ++ ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. quota=%s. " ++ "Opts: %s%s", descr, sb_any_quota_loaded(sb) ? "on" : "off", ++ sbi->s_es->s_mount_opts, + *sbi->s_es->s_mount_opts ? "; " : ""); + + lock_kernel(); +@@ -3981,6 +4036,12 @@ static int ext4_commit_super(struct supe + &EXT4_SB(sb)->s_freeblocks_counter)); + es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( + &EXT4_SB(sb)->s_freeinodes_counter)); ++#ifdef CONFIG_QUOTA ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { ++ es->s_usr_quota_inum = EXT4_SB(sb)->s_qf_inums[USRQUOTA]; ++ es->s_grp_quota_inum = EXT4_SB(sb)->s_qf_inums[GRPQUOTA]; ++ } ++#endif + sb->s_dirt = 0; + BUFFER_TRACE(sbh, "marking dirty"); + mark_buffer_dirty(sbh); +@@ -4531,6 +4592,22 @@ static int ext4_quota_on(struct super_bl + int err; + struct path path; + ++ /* When QUOTA feature is set, quota on enables enforcement, accounting ++ * being already enabled at mount time */ ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { ++ struct inode *qf_inode; ++ ++ if (!EXT4_SB(sb)->s_qf_inums[type]) ++ return -EINVAL; ++ qf_inode = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[type]); ++ if (IS_ERR(qf_inode)) ++ return PTR_ERR(qf_inode); ++ err = vfs_quota_enable(qf_inode, type, QFMT_VFS_V1, ++ DQUOT_LIMITS_ENABLED); ++ iput(qf_inode); ++ return err; ++ } ++ + if (!test_opt(sb, QUOTA)) + return -EINVAL; + /* When remounting, no checks are needed and in fact, name is NULL */ +@@ -4630,9 +4707,114 @@ static int ext4_quota_off(struct super_b + iput(inode); + } + ++ /* When QUOTA feature is set, quota off just disables enforcement but ++ * leaves accounting on */ ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ++ return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); ++ + return vfs_quota_off(sb, type, remount); + } + ++/* ++ * New quota_on function that is used to turn accounting on when QUOTA ++ * feature is set. ++ */ ++static int ext4_acct_on(struct super_block *sb) ++{ ++ struct inode *qf_inode[MAXQUOTAS]; ++ int rc; ++ ++ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || ++ !EXT4_SB(sb)->s_qf_inums[USRQUOTA] || ++ !EXT4_SB(sb)->s_qf_inums[GRPQUOTA]) ++ return -EINVAL; ++ ++ qf_inode[USRQUOTA] = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[USRQUOTA]); ++ if (IS_ERR(qf_inode[USRQUOTA])) { ++ EXT4_SB(sb)->s_qf_inums[USRQUOTA] = 0; ++ return PTR_ERR(qf_inode[USRQUOTA]); ++ } ++ qf_inode[GRPQUOTA] = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[GRPQUOTA]); ++ if (IS_ERR(qf_inode[GRPQUOTA])) { ++ iput(qf_inode[USRQUOTA]); ++ EXT4_SB(sb)->s_qf_inums[GRPQUOTA] = 0; ++ return PTR_ERR(qf_inode[GRPQUOTA]); ++ } ++ ++ /* ++ * When we journal data on quota file, we have to flush journal to see ++ * all updates to the file when we bypass pagecache... ++ */ ++ if (EXT4_SB(sb)->s_journal) { ++ /* ++ * We don't need to lock updates but journal_flush() could ++ * otherwise be livelocked... ++ */ ++ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); ++ rc = jbd2_journal_flush(EXT4_SB(sb)->s_journal); ++ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); ++ if (rc) { ++ iput(qf_inode[USRQUOTA]); ++ iput(qf_inode[GRPQUOTA]); ++ return rc; ++ } ++ } ++ ++ /* only enable quota accounting by default */ ++ rc = vfs_quota_enable(qf_inode[USRQUOTA], USRQUOTA, QFMT_VFS_V1, ++ DQUOT_USAGE_ENABLED); ++ iput(qf_inode[USRQUOTA]); ++ if (rc) { ++ iput(qf_inode[GRPQUOTA]); ++ return rc; ++ } ++ rc = vfs_quota_enable(qf_inode[GRPQUOTA], GRPQUOTA, QFMT_VFS_V1, ++ DQUOT_USAGE_ENABLED); ++ iput(qf_inode[GRPQUOTA]); ++ return rc; ++} ++ ++/* ++ * New quota_on function that is used to turn off accounting when QUOTA feature ++ * is set. ++ */ ++static int ext4_acct_off(struct super_block *sb) ++{ ++ int type, rc = 0; ++ ++ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ++ return -EINVAL; ++ ++ for (type = 0; type < MAXQUOTAS; type++) { ++ struct inode *inode = sb_dqopt(sb)->files[type]; ++ handle_t *handle; ++ ++ if (!inode) ++ continue; ++ /* Update modification times of quota files when userspace can ++ * start looking at them */ ++ handle = ext4_journal_start(inode, 1); ++ if (IS_ERR(handle)) ++ goto out; ++ ++ inode->i_mtime = inode->i_ctime = CURRENT_TIME; ++ ext4_mark_inode_dirty(handle, inode); ++ ext4_journal_stop(handle); ++ } ++ ++out: ++ for (type = 0; type < MAXQUOTAS; type++) { ++ int ret; ++ ret = vfs_quota_disable(sb, type, ++ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); ++ if (!rc && ret) ++ rc = ret; ++ } ++ return rc; ++} ++ ++ ++ + /* Read data from quotafile - avoid pagecache and such because we cannot afford + * acquiring the locks... As quota files are never truncated and quota code + * itself serializes the operations (and noone else should touch the files) diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch b/ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch new file mode 100644 index 0000000..d72dd05 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch @@ -0,0 +1,62 @@ +commit ca0e05e4b15193aeba72b995e90de990db7f8304 +Author: Dmitry Monakhov +Date: Sun Aug 1 17:48:36 2010 -0400 + + ext4: force block allocation on quota_off + + Perform full sync procedure so that any delayed allocation blocks are + allocated so quota will be consistent. + + Signed-off-by: Dmitry Monakhov + Signed-off-by: "Theodore Ts'o" + +Index: linux-stage/fs/ext4/super.c +=================================================================== +--- linux-stage.orig/fs/ext4/super.c 2012-06-26 09:37:06.039508000 +0200 ++++ linux-stage/fs/ext4/super.c 2012-06-26 11:35:09.824099000 +0200 +@@ -1104,6 +1104,7 @@ static int ext4_mark_dquot_dirty(struct + static int ext4_write_info(struct super_block *sb, int type); + static int ext4_quota_on(struct super_block *sb, int type, int format_id, + char *path, int remount); ++static int ext4_quota_off(struct super_block *sb, int type, int remount); + static int ext4_quota_on_mount(struct super_block *sb, int type); + static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off); +@@ -1173,7 +1174,7 @@ static const struct dquot_operations ext + + static const struct quotactl_ops ext4_qctl_operations = { + .quota_on = ext4_quota_on, +- .quota_off = vfs_quota_off, ++ .quota_off = ext4_quota_off, + .quota_sync = vfs_quota_sync, + .get_info = vfs_get_dqinfo, + .set_info = vfs_set_dqinfo, +@@ -4578,6 +4579,28 @@ static int ext4_quota_on(struct super_bl + return err; + } + ++static int ext4_quota_off(struct super_block *sb, int type, int remount) ++{ ++ struct quota_info *dqopt = sb_dqopt(sb); ++ ++ mutex_lock(&dqopt->dqonoff_mutex); ++ if (!sb_any_quota_loaded(sb)) { ++ /* nothing to do */ ++ mutex_unlock(&dqopt->dqonoff_mutex); ++ return 0; ++ } ++ mutex_unlock(&dqopt->dqonoff_mutex); ++ ++ /* Force all delayed allocation blocks to be allocated. */ ++ if (test_opt(sb, DELALLOC)) { ++ down_read(&sb->s_umount); ++ sync_filesystem(sb); ++ up_read(&sb->s_umount); ++ } ++ ++ return vfs_quota_off(sb, type, remount); ++} ++ + /* Read data from quotafile - avoid pagecache and such because we cannot afford + * acquiring the locks... As quota files are never truncated and quota code + * itself serializes the operations (and noone else should touch the files) diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch new file mode 100644 index 0000000..1e98c8f --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch @@ -0,0 +1,20 @@ +Index: linux-2.6.18-238.12.1/fs/ext4/ext4.h +=================================================================== +--- linux-2.6.18-238.12.1.orig/fs/ext4/ext4.h 2011-09-21 17:55:44.627741549 +0200 ++++ linux-2.6.18-238.12.1/fs/ext4/ext4.h 2011-09-21 18:05:20.974106450 +0200 +@@ -971,6 +971,7 @@ + #ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ ++ unsigned long s_qf_inums[MAXQUOTAS]; /* Quota file inodes */ + #endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ + struct rb_root system_blks; +@@ -1171,6 +1172,7 @@ + #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 + #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 + #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 ++#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 + + #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 + #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series index 0cc00f9..abf7009 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series @@ -38,3 +38,4 @@ ext4-vmalloc-rhel5.patch ext4-mballoc-group_check-rhel5.patch ext4-journal-callback-rhel5.patch ext4-store-tree-generation-at-find.patch +ext4-quota-minimal-rhel5.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series index 1dcd44e..416916f 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series @@ -35,3 +35,6 @@ ext4-vmalloc-rhel6.patch ext4-journal-callback.patch ext4-store-tree-generation-at-find.patch ext4_pdirop-rhel6.patch +ext4-quota-force-block-alloc-quotaoff.patch +ext4-quota-dont-update-cmtime.patch +ext4-quota-first-class.patch diff --git a/lustre/osd-ldiskfs/Makefile.in b/lustre/osd-ldiskfs/Makefile.in index 179bf2f..fda3f84 100644 --- a/lustre/osd-ldiskfs/Makefile.in +++ b/lustre/osd-ldiskfs/Makefile.in @@ -1,7 +1,7 @@ MODULES := osd_ldiskfs osd_ldiskfs-objs := osd_handler.o osd_oi.o osd_igif.o osd_lproc.o osd_iam.o \ osd_iam_lfix.o osd_iam_lvar.o osd_io.o osd_compat.o \ - osd_scrub.o + osd_scrub.o osd_quota.o osd_quota_fmt.o EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs diff --git a/lustre/osd-ldiskfs/autoMakefile.am b/lustre/osd-ldiskfs/autoMakefile.am index 3294c28..2952a75 100644 --- a/lustre/osd-ldiskfs/autoMakefile.am +++ b/lustre/osd-ldiskfs/autoMakefile.am @@ -40,4 +40,4 @@ endif MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ EXTRA_DIST := $(osd_ldiskfs-objs:%.o=%.c) osd_internal.h osd_oi.h osd_igif.h \ - osd_iam.h osd_scrub.h + osd_iam.h osd_scrub.h osd_quota_fmt.h diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index ed4ce8d..eced8db 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -72,6 +72,8 @@ /* llo_* api support */ #include +/* dt_acct_features */ +#include #ifdef HAVE_LDISKFS_PDO int ldiskfs_pdo = 1; @@ -1501,6 +1503,32 @@ static int osd_inode_setattr(const struct lu_env *env, return 0; } +static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr) +{ + if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) || + (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) { + struct iattr iattr; + int rc; + + iattr.ia_valid = 0; + if (attr->la_valid & LA_UID) + iattr.ia_valid |= ATTR_UID; + if (attr->la_valid & LA_GID) + iattr.ia_valid |= ATTR_GID; + iattr.ia_uid = attr->la_uid; + iattr.ia_gid = attr->la_gid; + + rc = ll_vfs_dq_transfer(inode, &iattr); + if (rc) { + CERROR("%s: quota transfer failed: rc = %d. Is quota " + "enforcement enabled on the ldiskfs filesystem?", + inode->i_sb->s_id, rc); + return rc; + } + } + return 0; +} + static int osd_attr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_attr *attr, @@ -1521,28 +1549,34 @@ static int osd_attr_set(const struct lu_env *env, OSD_EXEC_OP(handle, attr_set); inode = obj->oo_inode; + if (LDISKFS_HAS_RO_COMPAT_FEATURE(inode->i_sb, + LDISKFS_FEATURE_RO_COMPAT_QUOTA)) { + rc = osd_quota_transfer(inode, attr); + if (rc) + return rc; + } else { #ifdef HAVE_QUOTA_SUPPORT - if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) || - (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) { - struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt; - struct iattr iattr; - int rc; - - iattr.ia_valid = 0; - if (attr->la_valid & LA_UID) - iattr.ia_valid |= ATTR_UID; - if (attr->la_valid & LA_GID) - iattr.ia_valid |= ATTR_GID; - iattr.ia_uid = attr->la_uid; - iattr.ia_gid = attr->la_gid; - osd_push_ctxt(env, save); - rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0; - osd_pop_ctxt(save); - if (rc != 0) - return rc; - } + if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) || + (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) { + struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt; + struct iattr iattr; + int rc; + + iattr.ia_valid = 0; + if (attr->la_valid & LA_UID) + iattr.ia_valid |= ATTR_UID; + if (attr->la_valid & LA_GID) + iattr.ia_valid |= ATTR_GID; + iattr.ia_uid = attr->la_uid; + iattr.ia_gid = attr->la_gid; + osd_push_ctxt(env, save); + rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0; + osd_pop_ctxt(save); + if (rc != 0) + return rc; + } #endif - + } cfs_spin_lock(&obj->oo_guard); rc = osd_inode_setattr(env, inode, attr); cfs_spin_unlock(&obj->oo_guard); @@ -1552,26 +1586,6 @@ static int osd_attr_set(const struct lu_env *env, return rc; } -/* - * Object creation. - * - * XXX temporary solution. - */ -static int osd_create_pre(struct osd_thread_info *info, struct osd_object *obj, - struct lu_attr *attr, struct thandle *th) -{ - return 0; -} - -static int osd_create_post(struct osd_thread_info *info, struct osd_object *obj, - struct lu_attr *attr, struct thandle *th) -{ - osd_object_init0(obj); - if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW)) - unlock_new_inode(obj->oo_inode); - return 0; -} - struct dentry *osd_child_dentry_get(const struct lu_env *env, struct osd_object *obj, const char *name, const int namelen) @@ -1681,7 +1695,7 @@ static int osd_mk_index(struct osd_thread_info *info, struct osd_object *obj, struct osd_thandle *oth; const struct dt_index_features *feat = dof->u.dof_idx.di_feat; - __u32 mode = (attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX)); + __u32 mode = (attr->la_mode & (S_IFMT | S_IALLUGO | S_ISVTX)); LASSERT(S_ISREG(attr->la_mode)); @@ -1716,7 +1730,7 @@ static int osd_mkreg(struct osd_thread_info *info, struct osd_object *obj, { LASSERT(S_ISREG(attr->la_mode)); return osd_mkfile(info, obj, (attr->la_mode & - (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th); + (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th); } static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj, @@ -1727,7 +1741,7 @@ static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj, { LASSERT(S_ISLNK(attr->la_mode)); return osd_mkfile(info, obj, (attr->la_mode & - (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th); + (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th); } static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj, @@ -1736,7 +1750,7 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj, struct dt_object_format *dof, struct thandle *th) { - cfs_umode_t mode = attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX); + cfs_umode_t mode = attr->la_mode & (S_IFMT | S_IALLUGO | S_ISVTX); int result; LINVRNT(osd_invariant(obj)); @@ -1747,6 +1761,10 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj, result = osd_mkfile(info, obj, mode, hint, th); if (result == 0) { LASSERT(obj->oo_inode != NULL); + /* + * This inode should be marked dirty for i_rdev. Currently + * that is done in the osd_attr_init(). + */ init_special_inode(obj->oo_inode, mode, attr->la_rdev); } LINVRNT(osd_invariant(obj)); @@ -1798,6 +1816,51 @@ static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah, ah->dah_mode = child_mode; } +static void osd_attr_init(struct osd_thread_info *info, struct osd_object *obj, + struct lu_attr *attr, struct dt_object_format *dof) +{ + struct inode *inode = obj->oo_inode; + __u64 valid = attr->la_valid; + int result; + + attr->la_valid &= ~(LA_TYPE | LA_MODE); + + if (dof->dof_type != DFT_NODE) + attr->la_valid &= ~LA_RDEV; + if ((valid & LA_ATIME) && (attr->la_atime == LTIME_S(inode->i_atime))) + attr->la_valid &= ~LA_ATIME; + if ((valid & LA_CTIME) && (attr->la_ctime == LTIME_S(inode->i_ctime))) + attr->la_valid &= ~LA_CTIME; + if ((valid & LA_MTIME) && (attr->la_mtime == LTIME_S(inode->i_mtime))) + attr->la_valid &= ~LA_MTIME; + + if (LDISKFS_HAS_RO_COMPAT_FEATURE(inode->i_sb, + LDISKFS_FEATURE_RO_COMPAT_QUOTA)) { + result = osd_quota_transfer(inode, attr); + if (result) + return; + } else { +#ifdef HAVE_QUOTA_SUPPORT + attr->la_valid &= ~(LA_UID | LA_GID); +#endif + } + + if (attr->la_valid != 0) { + result = osd_inode_setattr(info->oti_env, inode, attr); + /* + * The osd_inode_setattr() should always succeed here. The + * only error that could be returned is EDQUOT when we are + * trying to change the UID or GID of the inode. However, this + * should not happen since quota enforcement is no longer + * enabled on ldiskfs (lquota takes care of it). + */ + LASSERTF(result == 0, "%d", result); + inode->i_sb->s_op->dirty_inode(inode); + } + + attr->la_valid = valid; +} + /** * Helper function for osd_object_create() * @@ -1809,16 +1872,26 @@ static int __osd_object_create(struct osd_thread_info *info, struct dt_object_format *dof, struct thandle *th) { + int result; + __u32 umask; - int result; + /* we drop umask so that permissions we pass are not affected */ + umask = current->fs->umask; + current->fs->umask = 0; - result = osd_create_pre(info, obj, attr, th); + result = osd_create_type_f(dof->dof_type)(info, obj, attr, hint, dof, + th); if (result == 0) { - result = osd_create_type_f(dof->dof_type)(info, obj, - attr, hint, dof, th); - if (result == 0) - result = osd_create_post(info, obj, attr, th); + osd_attr_init(info, obj, attr, dof); + osd_object_init0(obj); + /* bz 24037 */ + if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW)) + unlock_new_inode(obj->oo_inode); } + + /* restore previous umask value */ + current->fs->umask = umask; + return result; } @@ -1900,6 +1973,11 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, LASSERT(osd_write_locked(env, obj)); LASSERT(th != NULL); + if (unlikely(fid_is_acct(fid))) + /* Quota files can't be created from the kernel any more, + * 'tune2fs -O quota' will take care of creating them */ + RETURN(-EPERM); + OSD_EXEC_OP(th, create); result = __osd_object_create(info, obj, attr, hint, dof, th); @@ -1958,6 +2036,9 @@ static int osd_object_destroy(const struct lu_env *env, LASSERT(inode); LASSERT(!lu_object_is_dying(dt->do_lu.lo_header)); + if (unlikely(fid_is_acct(fid))) + RETURN(-EPERM); + /* Parallel control for OI scrub. For most of cases, there is no * lock contention. So it will not affect unlink performance. */ cfs_mutex_lock(&inode->i_mutex); @@ -2107,6 +2188,11 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, LASSERT(osd_write_locked(env, obj)); LASSERT(th != NULL); + if (unlikely(fid_is_acct(fid))) + /* Quota files can't be created from the kernel any more, + * 'tune2fs -O quota' will take care of creating them */ + RETURN(-EPERM); + OSD_EXEC_OP(th, create); result = __osd_object_create(info, obj, attr, hint, dof, th); @@ -2601,10 +2687,10 @@ static int osd_iam_container_init(const struct lu_env *env, static int osd_index_try(const struct lu_env *env, struct dt_object *dt, const struct dt_index_features *feat) { - int result; - int ea_dir = 0; - struct osd_object *obj = osd_dt_obj(dt); - struct osd_device *osd = osd_obj2dev(obj); + int result; + int skip_iam = 0; + struct osd_object *obj = osd_dt_obj(dt); + struct osd_device *osd = osd_obj2dev(obj); LINVRNT(osd_invariant(obj)); LASSERT(dt_object_exists(dt)); @@ -2618,10 +2704,14 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, result = 0; else result = -ENOTDIR; - ea_dir = 1; + skip_iam = 1; } else if (unlikely(feat == &dt_otable_features)) { dt->do_index_ops = &osd_otable_ops; return 0; + } else if (feat == &dt_acct_features) { + dt->do_index_ops = &osd_acct_index_ops; + result = 0; + skip_iam = 1; } else if (!osd_has_index(obj)) { struct osd_directory *dir; @@ -2657,7 +2747,7 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, result = 0; } - if (result == 0 && ea_dir == 0) { + if (result == 0 && skip_iam == 0) { if (!osd_iam_index_probe(env, obj, feat)) result = -ENOTDIR; } @@ -4179,7 +4269,10 @@ struct lu_context_key osd_key = { static int osd_device_init(const struct lu_env *env, struct lu_device *d, const char *name, struct lu_device *next) { - return osd_procfs_init(osd_dev(d), name); + struct osd_device *osd = osd_dev(d); + + strncpy(osd->od_svname, name, MAX_OBD_NAME); + return osd_procfs_init(osd, name); } static int osd_shutdown(const struct lu_env *env, struct osd_device *o) @@ -4190,7 +4283,13 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o) if (o->od_fsops) { fsfilt_put_ops(o->od_fsops); - o->od_fsops = NULL; + o->od_fsops = NULL; + } + + /* shutdown quota slave instance associated with the device */ + if (o->od_quota_slave != NULL) { + qsd_fini(env, o->od_quota_slave); + o->od_quota_slave = NULL; } RETURN(0); @@ -4358,10 +4457,19 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, if (result < 0) RETURN(result); + /* 2. setup quota slave instance */ + osd->od_quota_slave = qsd_init(env, osd->od_svname, &osd->od_dt_dev, + osd->od_proc_entry); + if (IS_ERR(osd->od_quota_slave)) { + result = PTR_ERR(osd->od_quota_slave); + osd->od_quota_slave = NULL; + RETURN(result); + } + if (!lu_device_is_md(pdev)) RETURN(0); - /* 2. setup local objects */ + /* 3. setup local objects */ result = llo_local_objects_setup(env, lu2md_dev(pdev), lu2dt_dev(dev)); RETURN(result); } diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 7e573d1..b757b53 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -72,10 +72,12 @@ #include #include #include +#include #include "osd_oi.h" #include "osd_iam.h" #include "osd_scrub.h" +#include "osd_quota_fmt.h" struct inode; @@ -301,6 +303,12 @@ struct osd_device { cfs_mutex_t od_otable_mutex; struct osd_otable_it *od_otable_it; struct osd_scrub od_scrub; + + /* service name associated with the osd device */ + char od_svname[MAX_OBD_NAME]; + + /* quota slave instance */ + struct qsd_instance *od_quota_slave; }; #define OSD_TRACK_DECLARES @@ -465,6 +473,19 @@ struct osd_it_iam { struct iam_iterator oi_it; }; +/** + * Iterator's in-memory data structure for quota file. + */ +struct osd_it_quota { + struct osd_object *oiq_obj; + /** tree blocks path to where the entry is stored */ + uint oiq_blk[LUSTRE_DQTREEDEPTH]; + /** on-disk offset for current key where quota record can be found */ + loff_t oiq_offset; + /** identifier for current quota record */ + __u64 oiq_id; +}; + #define MAX_BLOCKS_PER_PAGE (CFS_PAGE_SIZE / 512) struct osd_iobuf { @@ -528,11 +549,13 @@ struct osd_thread_info { /** osd iterator context used for iterator session */ - union { - struct osd_it_iam oti_it; - /** ldiskfs iterator data structure, see osd_it_ea_{init, fini} */ - struct osd_it_ea oti_it_ea; - }; + union { + struct osd_it_iam oti_it; + /* ldiskfs iterator data structure, + * see osd_it_ea_{init, fini} */ + struct osd_it_ea oti_it_ea; + struct osd_it_quota oti_it_quota; + }; /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */ void *oti_it_ea_buf; @@ -573,6 +596,12 @@ struct osd_thread_info { #define OSD_FID_REC_SZ 32 char oti_ldp[OSD_FID_REC_SZ]; char oti_ldp2[OSD_FID_REC_SZ]; + + /* used by quota code */ + union { + struct if_dqblk oti_dqblk; + struct if_dqinfo oti_dqinfo; + }; }; extern int ldiskfs_pdo; @@ -629,6 +658,17 @@ int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic, int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid, struct osd_inode_id *id); +/* osd_quota_fmt.c */ +int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj, + int type, uint blk, int depth, uint index, + struct osd_it_quota *it); +int walk_block_dqentry(const struct lu_env *env, struct osd_object *obj, + int type, uint blk, uint index, + struct osd_it_quota *it); +loff_t find_tree_dqentry(const struct lu_env *env, + struct osd_object *obj, int type, + qid_t dqid, uint blk, int depth, + struct osd_it_quota *it); /* * Invariants, assertions. */ @@ -819,5 +859,12 @@ int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack) return result; } +/** + * Quota/Accounting handling + */ +extern const struct dt_index_operations osd_acct_index_ops; +int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id); + #endif /* __KERNEL__ */ #endif /* _OSD_INTERNAL_H */ diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index 544eabc..dfa42ab 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -508,6 +508,9 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, osd_id_gen(id, osd_sb(osd)->s_root->d_inode->i_ino, osd_sb(osd)->s_root->d_inode->i_generation); } else { + if (unlikely(fid_is_acct(fid))) + return osd_acct_obj_lookup(info, osd, fid, id); + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) return osd_compat_spec_lookup(info, osd, fid, id); diff --git a/lustre/osd-ldiskfs/osd_quota.c b/lustre/osd-ldiskfs/osd_quota.c new file mode 100644 index 0000000..e636c6d --- /dev/null +++ b/lustre/osd-ldiskfs/osd_quota.c @@ -0,0 +1,389 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Use is subject to license terms. + * + * Author: Johann Lombardi + * Author: Niu Yawei + */ + +#include +#include "osd_internal.h" + +/** + * Helpers function to find out the quota type (USRQUOTA/GRPQUOTA) of a + * given object + */ +static inline int fid2type(const struct lu_fid *fid) +{ + LASSERT(fid_is_acct(fid)); + if (fid_oid(fid) == ACCT_GROUP_OID) + return GRPQUOTA; + return USRQUOTA; +} + +static inline int obj2type(struct dt_object *obj) +{ + return fid2type(lu_object_fid(&obj->do_lu)); +} + +/** + * Space Accounting Management + */ + +/** + * Look up an accounting object based on its fid. + * + * \param info - is the osd thread info passed by the caller + * \param osd - is the osd device + * \param fid - is the fid of the accounting object we want to look up + * \param id - is the osd_inode_id struct to fill with the inode number of + * the quota file if the lookup is successful + */ +int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id) +{ + struct super_block *sb = osd_sb(osd); + + ENTRY; + LASSERT(fid_is_acct(fid)); + + if (!LDISKFS_HAS_RO_COMPAT_FEATURE(sb, + LDISKFS_FEATURE_RO_COMPAT_QUOTA)) + RETURN(-ENOENT); + + id->oii_gen = OSD_OII_NOGEN; + id->oii_ino = LDISKFS_SB(sb)->s_qf_inums[fid2type(fid)]; + if (!ldiskfs_valid_inum(sb, id->oii_ino)) + RETURN(-ENOENT); + RETURN(0); +} + +/** + * Return space usage (#blocks & #inodes) consumed by a given uid or gid. + * + * \param env - is the environment passed by the caller + * \param dtobj - is the accounting object + * \param dtrec - is the record to fill with space usage information + * \param dtkey - is the id the of the user or group for which we would + * like to access disk usage. + * \param capa - is the capability, not used. + * + * \retval +ve - success : exact match + * \retval -ve - failure + */ +static int osd_acct_index_lookup(const struct lu_env *env, + struct dt_object *dtobj, + struct dt_rec *dtrec, + const struct dt_key *dtkey, + struct lustre_capa *capa) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct if_dqblk *dqblk = &info->oti_dqblk; + struct super_block *sb = osd_sb(osd_obj2dev(osd_dt_obj(dtobj))); + struct acct_rec *rec = (struct acct_rec *)dtrec; + __u64 id = *((__u64 *)dtkey); + int rc; + + ENTRY; + + memset((void *)dqblk, 0, sizeof(struct obd_dqblk)); + rc = sb->s_qcop->get_dqblk(sb, obj2type(dtobj), (qid_t) id, dqblk); + if (rc) + RETURN(rc); + rec->bspace = dqblk->dqb_curspace; + rec->ispace = dqblk->dqb_curinodes; + RETURN(+1); +} + +#define QUOTA_IT_READ_ERROR(it, rc) \ + CERROR("%s: Error while trying to read quota information, " \ + "failed with %d\n", \ + it->oiq_obj->oo_dt.do_lu.lo_dev->ld_obd->obd_name, rc); \ + +/** + * Initialize osd Iterator for given osd index object. + * + * \param dt - osd index object + * \param attr - not used + * \param capa - BYPASS_CAPA + */ +static struct dt_it *osd_it_acct_init(const struct lu_env *env, + struct dt_object *dt, + __u32 attr, struct lustre_capa *capa) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct osd_it_quota *it; + struct lu_object *lo = &dt->do_lu; + struct osd_object *obj = osd_dt_obj(dt); + + ENTRY; + + LASSERT(lu_object_exists(lo)); + + if (info == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + it = &info->oti_it_quota; + memset(it, 0, sizeof(*it)); + lu_object_get(lo); + it->oiq_obj = obj; + + /* LUSTRE_DQTREEOFF is the initial offset where the tree can be found */ + it->oiq_blk[0] = LUSTRE_DQTREEOFF; + + /* NB: we don't need to store the tree depth since it is always + * equal to LUSTRE_DQTREEDEPTH - 1 (root has depth = 0) for a leaf + * block. */ + RETURN((struct dt_it *)it); +} + +/** + * Free given iterator. + * + * \param di - osd iterator + */ +static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + + ENTRY; + lu_object_put(env, &it->oiq_obj->oo_dt.do_lu); + EXIT; +} + +/** + * Move Iterator to record specified by \a key, if the \a key isn't found, + * move to the first valid record. + * + * \param di - osd iterator + * \param key - uid or gid + * + * \retval +ve - di points to the first valid record + * \retval +1 - di points to exact matched key + * \retval -ve - failure + */ +static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di, + const struct dt_key *key) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + const struct lu_fid *fid = + lu_object_fid(&it->oiq_obj->oo_dt.do_lu); + int type = fid2type(fid); + qid_t dqid = *(qid_t *)key; + loff_t offset; + int rc; + + ENTRY; + + offset = find_tree_dqentry(env, it->oiq_obj, type, dqid, + LUSTRE_DQTREEOFF, 0, it); + if (offset > 0) { /* Found */ + RETURN(+1); + } else if (offset < 0) { /* Error */ + QUOTA_IT_READ_ERROR(it, (int)offset); + RETURN((int)offset); + } + + /* The @key is not found, move to the first valid entry */ + rc = walk_tree_dqentry(env, it->oiq_obj, type, it->oiq_blk[0], 0, + 0, it); + if (rc == 0) + rc = 1; + else if (rc > 0) + rc = -ENOENT; + + RETURN(rc); +} + +/** + * Release Iterator + * + * \param di - osd iterator + */ +static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di) +{ + return; +} + +/** + * Move on to the next valid entry. + * + * \param di - osd iterator + * + * \retval +ve - iterator reached the end + * \retval 0 - iterator has not reached the end yet + * \retval -ve - unexpected failure + */ +static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + const struct lu_fid *fid = + lu_object_fid(&it->oiq_obj->oo_dt.do_lu); + int type = fid2type(fid); + int depth, rc; + uint index; + + ENTRY; + + /* Let's first check if there are any remaining valid entry in the + * current leaf block. Start with the next entry after the current one. + */ + depth = LUSTRE_DQTREEDEPTH - 1; + index = GETIDINDEX(it->oiq_id, depth); + if (++index < LUSTRE_DQSTRINBLK) { + /* Search for the next valid entry from current index */ + rc = walk_block_dqentry(env, it->oiq_obj, type, + it->oiq_blk[depth], index, it); + if (rc < 0) { + QUOTA_IT_READ_ERROR(it, rc); + RETURN(rc); + } else if (rc == 0) { + /* Found on entry, @it is already updated to the + * new position in walk_block_dqentry(). */ + RETURN(0); + } + } + rc = 1; + + /* We have consumed all the entries of the current leaf block, move on + * to the next one. */ + depth--; + + /* We keep searching as long as walk_tree_dqentry() returns +1 + * (= no valid entry found). */ + for (; depth >= 0 && rc > 0; depth--) { + index = GETIDINDEX(it->oiq_id, depth); + if (++index > 0xff) + continue; + rc = walk_tree_dqentry(env, it->oiq_obj, type, + it->oiq_blk[depth], depth, index, it); + } + + if (rc < 0) + QUOTA_IT_READ_ERROR(it, rc); + RETURN(rc); +} + +/** + * Return pointer to the key under iterator. + * + * \param di - osd iterator + */ +static struct dt_key *osd_it_acct_key(const struct lu_env *env, + const struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + + ENTRY; + RETURN((struct dt_key *)&it->oiq_id); +} + +/** + * Return size of key under iterator (in bytes) + * + * \param di - osd iterator + */ +static int osd_it_acct_key_size(const struct lu_env *env, + const struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + + ENTRY; + RETURN((int)sizeof(it->oiq_id)); +} + +/** + * Return pointer to the record under iterator. + * + * \param di - osd iterator + * \param attr - not used + */ +static int osd_it_acct_rec(const struct lu_env *env, + const struct dt_it *di, + struct dt_rec *dtrec, __u32 attr) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + const struct dt_key *key = osd_it_acct_key(env, di); + int rc; + + ENTRY; + + rc = osd_acct_index_lookup(env, &it->oiq_obj->oo_dt, dtrec, key, + BYPASS_CAPA); + RETURN(rc > 0 ? 0 : rc); +} + +/** + * Returns cookie for current Iterator position. + * + * \param di - osd iterator + */ +static __u64 osd_it_acct_store(const struct lu_env *env, + const struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + + ENTRY; + RETURN(it->oiq_id); +} + +/** + * Restore iterator from cookie. if the \a hash isn't found, + * restore the first valid record. + * + * \param di - osd iterator + * \param hash - iterator location cookie + * + * \retval +ve - di points to the first valid record + * \retval +1 - di points to exact matched hash + * \retval -ve - failure + */ +static int osd_it_acct_load(const struct lu_env *env, + const struct dt_it *di, __u64 hash) +{ + ENTRY; + RETURN(osd_it_acct_get(env, (struct dt_it *)di, + (const struct dt_key *)&hash)); +} + +/** + * Index and Iterator operations for accounting objects + */ +const struct dt_index_operations osd_acct_index_ops = { + .dio_lookup = osd_acct_index_lookup, + .dio_it = { + .init = osd_it_acct_init, + .fini = osd_it_acct_fini, + .get = osd_it_acct_get, + .put = osd_it_acct_put, + .next = osd_it_acct_next, + .key = osd_it_acct_key, + .key_size = osd_it_acct_key_size, + .rec = osd_it_acct_rec, + .store = osd_it_acct_store, + .load = osd_it_acct_load + } +}; + diff --git a/lustre/osd-ldiskfs/osd_quota_fmt.c b/lustre/osd-ldiskfs/osd_quota_fmt.c new file mode 100644 index 0000000..4dbfa07 --- /dev/null +++ b/lustre/osd-ldiskfs/osd_quota_fmt.c @@ -0,0 +1,284 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Use is subject to license terms. + * + * Lustre administrative quota format. + * from linux/fs/quota_v2.c + */ + +#include "osd_internal.h" +#include "osd_quota_fmt.h" + +typedef char *dqbuf_t; + +static const union +{ + struct lustre_disk_dqblk_v2 r1; +} emptydquot = { .r1 = { 0 } }; + +static inline dqbuf_t getdqbuf(void) +{ + dqbuf_t buf = cfs_alloc(LUSTRE_DQBLKSIZE, CFS_ALLOC_IO); + if (!buf) + CWARN("Not enough memory for quota buffers.\n"); + return buf; +} + +static inline void freedqbuf(dqbuf_t buf) +{ + cfs_free(buf); +} + +/** + * Read the \a blk into \a buf. + * + * TODO Will support enforcement quota later. + */ +static ssize_t quota_read_blk(const struct lu_env *env, + struct osd_object *obj, + int type, uint blk, dqbuf_t buf) +{ + ssize_t ret; + struct super_block *sb = obj->oo_inode->i_sb; + + ENTRY; + + memset(buf, 0, LUSTRE_DQBLKSIZE); + ret = sb->s_op->quota_read(sb, type, buf, LUSTRE_DQBLKSIZE, + blk << LUSTRE_DQBLKSIZE_BITS); + + /* Reading past EOF just returns a block of zeros */ + if (ret == -EBADR) + ret = 0; + + RETURN(ret); +} + +/** + * Find entry in block by given \a dqid in the leaf block \a blk + * + * \retval +ve, the offset of the entry in file + * \retval 0, entry not found + * \retval -ve, unexpected failure + */ +static loff_t find_block_dqentry(const struct lu_env *env, + struct osd_object *obj, int type, + qid_t dqid, uint blk, + struct osd_it_quota *it) +{ + dqbuf_t buf = getdqbuf(); + loff_t ret; + int i; + struct lustre_disk_dqblk_v2 *ddquot; + int dqblk_sz; + + ENTRY; + + ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf); + dqblk_sz = sizeof(struct lustre_disk_dqblk_v2); + if (!buf) + RETURN(-ENOMEM); + ret = quota_read_blk(env, obj, type, blk, buf); + if (ret < 0) { + CERROR("Can't read quota tree block %u.\n", blk); + GOTO(out_buf, ret); + } + + if (dqid) { + for (i = 0; i < LUSTRE_DQSTRINBLK && + le32_to_cpu(ddquot[i].dqb_id) != dqid; i++) + continue; + } else { /* ID 0 as a bit more complicated searching... */ + for (i = 0; i < LUSTRE_DQSTRINBLK; i++) + if (!le32_to_cpu(ddquot[i].dqb_id) && + memcmp((char *)&emptydquot, (char *)&ddquot[i], + dqblk_sz)) + break; + } + if (i == LUSTRE_DQSTRINBLK) { + CDEBUG(D_QUOTA, "Quota for id %u not found.\n", dqid); + ret = 0; + GOTO(out_buf, ret); + } else { + ret = (blk << LUSTRE_DQBLKSIZE_BITS) + + sizeof(struct lustre_disk_dqdbheader) + i * dqblk_sz; + + if (it) { + it->oiq_blk[LUSTRE_DQTREEDEPTH - 1] = blk; + it->oiq_offset = ret; + it->oiq_id = dqid; + } else { + ret = 0; + } + } +out_buf: + freedqbuf(buf); + RETURN(ret); +} + +/** + * Find entry for given \a dqid in the tree block \a blk + * + * \retval +ve, the offset of the entry in file + * \retval 0, entry not found + * \retval -ve, unexpected failure + */ +loff_t find_tree_dqentry(const struct lu_env *env, + struct osd_object *obj, int type, + qid_t dqid, uint blk, int depth, + struct osd_it_quota *it) +{ + dqbuf_t buf = getdqbuf(); + loff_t ret; + u32 *ref = (u32 *) buf; + + ENTRY; + + if (!buf) + RETURN(-ENOMEM); + ret = quota_read_blk(env, obj, 0, blk, buf); + if (ret < 0) { + CERROR("Can't read quota tree block %u.\n", blk); + GOTO(out_buf, ret); + } + ret = 0; + blk = le32_to_cpu(ref[GETIDINDEX(dqid, depth)]); + if (!blk) /* No reference? */ + GOTO(out_buf, ret); + + if (depth < LUSTRE_DQTREEDEPTH - 1) + ret = find_tree_dqentry(env, obj, type, dqid, blk, + depth + 1, it); + else + ret = find_block_dqentry(env, obj, type, dqid, blk, it); + + if (it && ret > 0) /* Entry found */ + it->oiq_blk[depth] = blk; +out_buf: + freedqbuf(buf); + RETURN(ret); +} + +/** + * Search from \a index within the leaf block \a blk, and fill the \a it with + * the first valid entry. + * + * \retval +ve, no valid entry found + * \retval 0, entry found + * \retval -ve, unexpected failure + */ +int walk_block_dqentry(const struct lu_env *env, struct osd_object *obj, + int type, uint blk, uint index, + struct osd_it_quota *it) +{ + dqbuf_t buf = getdqbuf(); + loff_t ret = 0; + struct lustre_disk_dqdbheader *dqhead; + int i, dqblk_sz; + struct lustre_disk_dqblk_v2 *ddquot; + + ENTRY; + + dqhead = (struct lustre_disk_dqdbheader *)buf; + dqblk_sz = sizeof(struct lustre_disk_dqblk_v2); + if (!buf) + RETURN(-ENOMEM); + ret = quota_read_blk(env, obj, type, blk, buf); + if (ret < 0) { + CERROR("Can't read quota tree block %u.\n", blk); + GOTO(out_buf, ret); + } + ret = 1; + + if (!le32_to_cpu(dqhead->dqdh_entries)) + GOTO(out_buf, ret); + + ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf); + LASSERT(index < LUSTRE_DQSTRINBLK); + for (i = index; i < LUSTRE_DQSTRINBLK; i++) { + /* skip empty entry */ + if (!memcmp((char *)&emptydquot, + (char *)&ddquot[i], dqblk_sz)) + continue; + + it->oiq_blk[LUSTRE_DQTREEDEPTH - 1] = blk; + it->oiq_id = le32_to_cpu(ddquot[i].dqb_id); + it->oiq_offset = (blk << LUSTRE_DQBLKSIZE_BITS) + + sizeof(struct lustre_disk_dqdbheader) + + i * dqblk_sz; + ret = 0; + break; + } + +out_buf: + freedqbuf(buf); + RETURN(ret); +} + +/** + * Search from \a index within the tree block \a blk, and fill the \a it + * with the first valid entry. + * + * \retval +ve, no valid entry found + * \retval 0, entry found + * \retval -ve, unexpected failure + */ +int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj, + int type, uint blk, int depth, uint index, + struct osd_it_quota *it) +{ + dqbuf_t buf = getdqbuf(); + loff_t ret; + u32 *ref = (u32 *) buf; + + ENTRY; + + if (!buf) + RETURN(-ENOMEM); + ret = quota_read_blk(env, obj, type, blk, buf); + if (ret < 0) { + CERROR("Can't read quota tree block %u.\n", blk); + goto out_buf; + } + ret = 1; + + for (; index <= 0xff && ret > 0; index++) { + blk = le32_to_cpu(ref[index]); + if (!blk) /* No reference */ + continue; + + if (depth < LUSTRE_DQTREEDEPTH - 1) + ret = walk_tree_dqentry(env, obj, type, blk, + depth + 1, 0, it); + else + ret = walk_block_dqentry(env, obj, type, blk, 0, it); + } + + if (ret == 0) /* Entry found */ + it->oiq_blk[depth] = blk; +out_buf: + freedqbuf(buf); + RETURN(ret); +} diff --git a/lustre/osd-ldiskfs/osd_quota_fmt.h b/lustre/osd-ldiskfs/osd_quota_fmt.h new file mode 100644 index 0000000..2857a7d --- /dev/null +++ b/lustre/osd-ldiskfs/osd_quota_fmt.h @@ -0,0 +1,112 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Use is subject to license terms. + * + * Lustre ldiskfs quota format + * from include/linux/quotaio_v2.h + */ +#ifndef _OSD_QUOTA_FMT_H +#define _OSD_QUOTA_FMT_H + +#include +#include + +/* + * The following structure defines the format of the disk quota file + * (as it appears on disk) - the file is a radix tree whose leaves point + * to blocks of these structures. for the version 2. + */ +struct lustre_disk_dqblk_v2 { + __u32 dqb_id; /**< id this quota applies to */ + __u32 padding; + __u64 dqb_ihardlimit; /**< absolute limit on allocated inodes */ + __u64 dqb_isoftlimit; /**< preferred inode limit */ + __u64 dqb_curinodes; /**< current # allocated inodes */ + /**< absolute limit on disk space (in QUOTABLOCK_SIZE) */ + __u64 dqb_bhardlimit; + /**< preferred limit on disk space (in QUOTABLOCK_SIZE) */ + __u64 dqb_bsoftlimit; + __u64 dqb_curspace; /**< current space occupied (in bytes) */ + obd_time dqb_btime; /**< time limit for excessive disk use */ + obd_time dqb_itime; /**< time limit for excessive inode use */ +}; + +/* Number of entries in one blocks(14 entries) */ +#define LUSTRE_DQSTRINBLK \ + ((LUSTRE_DQBLKSIZE - sizeof(struct lustre_disk_dqdbheader)) \ + / sizeof(struct lustre_disk_dqblk_v2)) +#define GETENTRIES(buf) (((char *)buf)+sizeof(struct lustre_disk_dqdbheader)) + +/* + * Here are header structures as written on disk and their in-memory copies + */ +/* First generic header */ +struct lustre_disk_dqheader { + __u32 dqh_magic; /* Magic number identifying file */ + __u32 dqh_version; /* File version */ +}; + +/* Header with type and version specific information */ +struct lustre_disk_dqinfo { + /* Time before block soft limit becomes hard limit */ + __u32 dqi_bgrace; + /* Time before inode soft limit becomes hard limit */ + __u32 dqi_igrace; + /* Flags for quotafile (DQF_*) */ + __u32 dqi_flags; + /* Number of blocks in file */ + __u32 dqi_blocks; + /* Number of first free block in the list */ + __u32 dqi_free_blk; + /* Number of block with at least one free entry */ + __u32 dqi_free_entry; +}; + +/* + * Structure of header of block with quota structures. It is padded to + * 16 bytes so there will be space for exactly 21 quota-entries in a block + */ +struct lustre_disk_dqdbheader { + __u32 dqdh_next_free; /* Number of next block with free entry */ + __u32 dqdh_prev_free; /* Number of previous block with free entry */ + __u16 dqdh_entries; /* Number of valid entries in block */ + __u16 dqdh_pad1; + __u32 dqdh_pad2; +}; + +/* Offset of info header in file */ +#define LUSTRE_DQINFOOFF sizeof(struct lustre_disk_dqheader) +#define LUSTRE_DQBLKSIZE_BITS 10 +/* Size of block with quota structures */ +#define LUSTRE_DQBLKSIZE (1 << LUSTRE_DQBLKSIZE_BITS) +/* Offset of tree in file in blocks */ +#define LUSTRE_DQTREEOFF 1 +/* Depth of quota tree */ +#define LUSTRE_DQTREEDEPTH 4 + +#define GETIDINDEX(id, depth) (((id) >> \ + ((LUSTRE_DQTREEDEPTH - (depth) - 1) * 8)) & \ + 0xff) +#endif /* osd_quota_fmt.h */ diff --git a/lustre/quota/lquota_lib.c b/lustre/quota/lquota_lib.c index dfffb86..0f5d984 100644 --- a/lustre/quota/lquota_lib.c +++ b/lustre/quota/lquota_lib.c @@ -1,5 +1,4 @@ /* - * * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- 1.8.3.1