From: Aditya Kali This patch is an attempt towards supporting quotas as first class feature in ext4. It is based on the proposal at: https://ext4.wiki.kernel.org/index.php/Design_For_1st_Class_Quota_in_Ext4 This patch introduces a new feature - EXT4_FEATURE_RO_COMPAT_QUOTA which, when turned on, enables quota accounting at mount time iteself. Also, the quota inodes are stored in two additional superblock fields. Some changes introduced by this patch that should be pointed out are: 1) Two new ext4-superblock fields - s_usr_quota_inum and s_grp_quota_inum for storing the quota inodes in use. 2) If the QUOTA feature and corresponding quota inodes are set in superblock, Quotas are turned on at mount time irrespective of the quota mount options. Thus the mount options 'quota', 'usrquota' and 'grpquota' are completely ignored with the new QUOTA feature flag. 3) Default quota inodes are: inode#3 for tracking userquota and inode#4 for tracking group quota. The superblock fields can be set to use other inodes as well. 4) mke2fs or tune2fs will initialize these inodes when quota feature is being set. The default reserved inodes will not be visible to user as regular files. 5) Once quotas are turned on, they cannot be turned off while the FS is mounted. This is because we do not want to let the quota get inconsistent. 6) With the QUOTA feature set, since the quota inodes are hidden, some of the utilities from quota-tools will no longer work correctly. Instead, e2fsprogs will include support for fixing the quota files. 7) Support is only for the new V2 quota file format. Signed-off-by: Aditya Kali --- Index: linux-stage/fs/ext4/ext4.h =================================================================== --- linux-stage.orig/fs/ext4/ext4.h +++ linux-stage/fs/ext4/ext4.h @@ -185,6 +185,8 @@ typedef struct ext4_io_end { */ #define EXT4_BAD_INO 1 /* Bad blocks inode */ #define EXT4_ROOT_INO 2 /* Root inode */ +#define EXT4_USR_QUOTA_INO 3 /* User quota inode */ +#define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */ #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ @@ -1042,7 +1044,9 @@ struct ext4_super_block { __u8 s_last_error_func[32]; /* function where the error happened */ #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) __u8 s_mount_opts[64]; - __le32 s_reserved[112]; /* Padding to the end of the block */ + __le32 s_usr_quota_inum; /* inode for tracking user quota */ + __le32 s_grp_quota_inum; /* inode for tracking group quota */ + __le32 s_reserved[110]; /* Padding to the end of the block */ }; #ifdef __KERNEL__ @@ -1116,6 +1120,7 @@ struct ext4_sb_info { #ifdef CONFIG_QUOTA char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ + unsigned long s_qf_inums[MAXQUOTAS]; /* Quota file inodes */ #endif unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ struct rb_root system_blks; @@ -1216,6 +1221,8 @@ static inline struct timespec ext4_curre static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || + ino == EXT4_USR_QUOTA_INO || + ino == EXT4_GRP_QUOTA_INO || ino == EXT4_JOURNAL_INO || ino == EXT4_RESIZE_INO || (ino >= EXT4_FIRST_INO(sb) && @@ -1320,6 +1327,7 @@ EXT4_INODE_BIT_FNS(state, state_flags) #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 +#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -1352,7 +1360,8 @@ EXT4_INODE_BIT_FNS(state, state_flags) EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ - EXT4_FEATURE_RO_COMPAT_HUGE_FILE) + EXT4_FEATURE_RO_COMPAT_HUGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_QUOTA) /* * Default values for user and/or group using reserved blocks Index: linux-stage/fs/ext4/ext4_jbd2.h =================================================================== --- linux-stage.orig/fs/ext4/ext4_jbd2.h 2012-06-26 11:35:31.025105000 +0200 +++ linux-stage/fs/ext4/ext4_jbd2.h 2012-06-26 11:37:38.250631000 +0200 @@ -89,14 +89,20 @@ #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) +#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ + EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ -#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ - (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) +#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ + EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ + +3+DQUOT_INIT_REWRITE) : 0) -#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ - (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) +#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ + EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ + +3+DQUOT_DEL_REWRITE) : 0) #else #define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 Index: linux-stage/fs/ext4/super.c =================================================================== --- linux-stage.orig/fs/ext4/super.c +++ linux-stage/fs/ext4/super.c @@ -115,6 +115,11 @@ void ext4_kvfree(void *ptr) static int bigendian_extents; +#ifdef CONFIG_QUOTA +static int ext4_acct_on(struct super_block *sb); +static int ext4_acct_off(struct super_block *sb); +#endif + ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { @@ -703,6 +708,12 @@ static void ext4_put_super(struct super_ ext4_unregister_li_request(sb); +#ifdef CONFIG_QUOTA + /* disable usage tracking which was enabled at mount time */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) + ext4_acct_off(sb); +#endif + flush_workqueue(sbi->dio_unwritten_wq); destroy_workqueue(sbi->dio_unwritten_wq); @@ -2162,14 +2173,22 @@ static void ext4_orphan_cleanup(struct s #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < MAXQUOTAS; i++) { - if (EXT4_SB(sb)->s_qf_names[i]) { - int ret = ext4_quota_on_mount(sb, i); - if (ret < 0) - ext4_msg(sb, KERN_ERR, - "Cannot turn on journaled " - "quota: error %d", ret); + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + int ret; + ret = ext4_acct_on(sb); + if (ret) + ext4_msg(sb, KERN_ERR, "Failed to turn on usage " + "tracking for quota: error %d", ret); + } else { + /* Turn on quotas so that they are updated correctly */ + for (i = 0; i < MAXQUOTAS; i++) { + if (EXT4_SB(sb)->s_qf_names[i]) { + int ret = ext4_quota_on_mount(sb, i); + if (ret < 0) + ext4_msg(sb, KERN_ERR, + "Cannot turn on journaled " + "quota: error %d", ret); + } } } #endif @@ -2213,10 +2232,14 @@ static void ext4_orphan_cleanup(struct s ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA - /* Turn quotas off */ - for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(sb)->files[i]) - vfs_quota_off(sb, i, 0); + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + ext4_acct_off(sb); + } else { + /* Turn quotas off */ + for (i = 0; i < MAXQUOTAS; i++) { + if (sb_dqopt(sb)->files[i]) + vfs_quota_off(sb, i, 0); + } } #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -3408,6 +3431,15 @@ static int ext4_fill_super(struct super_ #ifdef CONFIG_QUOTA sb->s_qcop = &ext4_qctl_operations; sb->dq_op = &ext4_quota_operations; + + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + /* Use new qctl operations with quota on function that does not + * require user specified quota file path. */ + sb->s_qcop = &ext4_qctl_operations; + + sbi->s_qf_inums[USRQUOTA] = es->s_usr_quota_inum; + sbi->s_qf_inums[GRPQUOTA] = es->s_grp_quota_inum; + } #endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); @@ -3633,13 +3665,40 @@ no_journal: } else descr = "out journal"; - ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %s%s", descr, sbi->s_es->s_mount_opts, +#ifdef CONFIG_QUOTA + /* Enable space tracking during mount, enforcement can be enabled/disable + * later with quota_on/off */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + !(sb->s_flags & MS_RDONLY)) { + ret = ext4_acct_on(sb); + if (ret) { + ext4_msg(sb, KERN_ERR, "Can't enable usage tracking on " + "a filesystem with the QUOTA feature set"); + goto failed_mount8; + } + } +#else + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + !(sb->s_flags & MS_RDONLY)) + ext4_msg(sb, KERN_WARNING, "Mounting a filesystem with the " + "QUOTA feature set whereas the kernel does not " + "support quota, e2fsck will be required to fix usage " + "information"); + +#endif /* CONFIG_QUOTA */ + + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. quota=%s. " + "Opts: %s%s", descr, sb_any_quota_loaded(sb) ? "on" : "off", + sbi->s_es->s_mount_opts, *sbi->s_es->s_mount_opts ? "; " : ""); lock_kernel(); return 0; +#ifdef CONFIG_QUOTA +failed_mount8: + kobject_del(&sbi->s_kobj); +#endif cantfind_ext4: if (!silent) ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); @@ -3991,6 +4050,12 @@ static int ext4_commit_super(struct supe &EXT4_SB(sb)->s_freeblocks_counter)); es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); +#ifdef CONFIG_QUOTA + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + es->s_usr_quota_inum = EXT4_SB(sb)->s_qf_inums[USRQUOTA]; + es->s_grp_quota_inum = EXT4_SB(sb)->s_qf_inums[GRPQUOTA]; + } +#endif sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); @@ -4552,6 +4617,22 @@ static int ext4_quota_on(struct super_bl int err; struct path path; + /* When QUOTA feature is set, quota on enables enforcement, accounting + * being already enabled at mount time */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + struct inode *qf_inode; + + if (!EXT4_SB(sb)->s_qf_inums[type]) + return -EINVAL; + qf_inode = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[type]); + if (IS_ERR(qf_inode)) + return PTR_ERR(qf_inode); + err = vfs_quota_enable(qf_inode, type, QFMT_VFS_V1, + DQUOT_LIMITS_ENABLED); + iput(qf_inode); + return err; + } + if (!test_opt(sb, QUOTA)) return -EINVAL; /* When remounting, no checks are needed and in fact, name is NULL */ @@ -4651,9 +4732,114 @@ static int ext4_quota_off(struct super_b iput(inode); } + /* When QUOTA feature is set, quota off just disables enforcement but + * leaves accounting on */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) + return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); + return vfs_quota_off(sb, type, remount); } +/* + * New quota_on function that is used to turn accounting on when QUOTA + * feature is set. + */ +static int ext4_acct_on(struct super_block *sb) +{ + struct inode *qf_inode[MAXQUOTAS]; + int rc; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || + !EXT4_SB(sb)->s_qf_inums[USRQUOTA] || + !EXT4_SB(sb)->s_qf_inums[GRPQUOTA]) + return -EINVAL; + + qf_inode[USRQUOTA] = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[USRQUOTA]); + if (IS_ERR(qf_inode[USRQUOTA])) { + EXT4_SB(sb)->s_qf_inums[USRQUOTA] = 0; + return PTR_ERR(qf_inode[USRQUOTA]); + } + qf_inode[GRPQUOTA] = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[GRPQUOTA]); + if (IS_ERR(qf_inode[GRPQUOTA])) { + iput(qf_inode[USRQUOTA]); + EXT4_SB(sb)->s_qf_inums[GRPQUOTA] = 0; + return PTR_ERR(qf_inode[GRPQUOTA]); + } + + /* + * When we journal data on quota file, we have to flush journal to see + * all updates to the file when we bypass pagecache... + */ + if (EXT4_SB(sb)->s_journal) { + /* + * We don't need to lock updates but journal_flush() could + * otherwise be livelocked... + */ + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + rc = jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (rc) { + iput(qf_inode[USRQUOTA]); + iput(qf_inode[GRPQUOTA]); + return rc; + } + } + + /* only enable quota accounting by default */ + rc = vfs_quota_enable(qf_inode[USRQUOTA], USRQUOTA, QFMT_VFS_V1, + DQUOT_USAGE_ENABLED); + iput(qf_inode[USRQUOTA]); + if (rc) { + iput(qf_inode[GRPQUOTA]); + return rc; + } + rc = vfs_quota_enable(qf_inode[GRPQUOTA], GRPQUOTA, QFMT_VFS_V1, + DQUOT_USAGE_ENABLED); + iput(qf_inode[GRPQUOTA]); + return rc; +} + +/* + * New quota_on function that is used to turn off accounting when QUOTA feature + * is set. + */ +static int ext4_acct_off(struct super_block *sb) +{ + int type, rc = 0; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) + return -EINVAL; + + for (type = 0; type < MAXQUOTAS; type++) { + struct inode *inode = sb_dqopt(sb)->files[type]; + handle_t *handle; + + if (!inode) + continue; + /* Update modification times of quota files when userspace can + * start looking at them */ + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) + goto out; + + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + } + +out: + for (type = 0; type < MAXQUOTAS; type++) { + int ret; + ret = vfs_quota_disable(sb, type, + DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + if (!rc && ret) + rc = ret; + } + return rc; +} + + + /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files)