From 57df6353ca590f32a32c0f0dc4a1d10a86a027ec Mon Sep 17 00:00:00 2001
From: Mikhail Pershin <tappro@whamcloud.com>
Date: Tue, 19 Jun 2012 23:49:35 +0400
Subject: [PATCH] LU-1182 ldiskfs-osd: space accounting support

Add space accounting support to ldiskfs OSD.

This patch also sets initial attributes in do_create().
mdd_attr_set_internal() from mdd_object_initialize() is kept until
EDQUOT is returned in lquota itself.
Attributes of new inodes are now initialized in osd_object_create().
All LA_MODE bits are now passed to ldiskfs_create_inode().
(original patch from LiWei, see ORI-46)

Signed-off-by: Johann Lombardi <johann@whamcloud.com>
Change-Id: I77a621c76343c2633810bb3cef9859ee30b7b23a
Reviewed-on: http://review.whamcloud.com/3160
Reviewed-by: Niu Yawei <niu@whamcloud.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
---
 .../patches/ext4-quota-dont-update-cmtime.patch    |  92 +++++
 .../patches/ext4-quota-first-class.patch           | 399 +++++++++++++++++++++
 .../ext4-quota-force-block-alloc-quotaoff.patch    |  62 ++++
 .../patches/ext4-quota-minimal-rhel5.patch         |  20 ++
 .../series/ldiskfs-2.6-rhel5-ext4.series           |   1 +
 .../kernel_patches/series/ldiskfs-2.6-rhel6.series |   3 +
 lustre/osd-ldiskfs/Makefile.in                     |   2 +-
 lustre/osd-ldiskfs/autoMakefile.am                 |   2 +-
 lustre/osd-ldiskfs/osd_handler.c                   | 226 +++++++++---
 lustre/osd-ldiskfs/osd_internal.h                  |  57 ++-
 lustre/osd-ldiskfs/osd_oi.c                        |   3 +
 lustre/osd-ldiskfs/osd_quota.c                     | 389 ++++++++++++++++++++
 lustre/osd-ldiskfs/osd_quota_fmt.c                 | 284 +++++++++++++++
 lustre/osd-ldiskfs/osd_quota_fmt.h                 | 112 ++++++
 lustre/quota/lquota_lib.c                          |   1 -
 15 files changed, 1586 insertions(+), 67 deletions(-)
 create mode 100644 ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch
 create mode 100644 ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch
 create mode 100644 ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch
 create mode 100644 ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch
 create mode 100644 lustre/osd-ldiskfs/osd_quota.c
 create mode 100644 lustre/osd-ldiskfs/osd_quota_fmt.c
 create mode 100644 lustre/osd-ldiskfs/osd_quota_fmt.h

diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch b/ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch
new file mode 100644
index 0000000..4c0a7f5
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext4-quota-dont-update-cmtime.patch
@@ -0,0 +1,92 @@
+commit 21f976975cbecbdaf23ceeacc1cab2b1c05a028e
+Author: Jan Kara <jack@suse.cz>
+Date:   Mon Apr 4 15:33:39 2011 -0400
+
+    ext4: remove unnecessary [cm]time update of quota file
+
+    It is not necessary to update [cm]time of quota file on each quota
+    file write and it wastes journal space and IO throughput with inode
+    writes. So just remove the updating from ext4_quota_write() and only
+    update times when quotas are being turned off. Userspace cannot get
+    anything reliable from quota files while they are used by the kernel
+    anyway.
+
+    Signed-off-by: Jan Kara <jack@suse.cz>
+    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h	2012-06-26 11:26:25.000000000 +0200
++++ linux-stage/fs/ext4/ext4_jbd2.h	2012-06-26 11:35:31.025105000 +0200
+@@ -88,8 +88,8 @@
+ 
+ #ifdef CONFIG_QUOTA
+ /* Amount of blocks needed for quota update - we know that the structure was
+- * allocated so we need to update only inode+data */
+-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
++ * allocated so we need to update only data block */
++#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
+ /* Amount of blocks needed for quota insert/delete - we do some block writes
+  * but inode, sb and group updates are done only once */
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c	2012-06-26 11:35:09.000000000 +0200
++++ linux-stage/fs/ext4/super.c	2012-06-26 11:37:30.905374000 +0200
+@@ -4582,6 +4582,7 @@ static int ext4_quota_on(struct super_bl
+ static int ext4_quota_off(struct super_block *sb, int type, int remount)
+ {
+ 	struct quota_info *dqopt = sb_dqopt(sb);
++	int                cnt;
+ 
+ 	mutex_lock(&dqopt->dqonoff_mutex);
+ 	if (!sb_any_quota_loaded(sb)) {
+@@ -4598,6 +4599,37 @@ static int ext4_quota_off(struct super_b
+ 		up_read(&sb->s_umount);
+ 	}
+ 
++	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++		struct inode      *inode;
++		handle_t          *handle;
++
++		if (type != -1 && cnt != type)
++			continue;
++
++		mutex_lock(&dqopt->dqonoff_mutex);
++		inode = dqopt->files[cnt];
++		if (!sb_has_quota_loaded(sb, cnt) || !inode) {
++			mutex_unlock(&dqopt->dqonoff_mutex);
++			continue;
++		}
++
++		inode = igrab(inode);
++		mutex_unlock(&dqopt->dqonoff_mutex);
++
++		if (!inode)
++			continue;
++
++		/* Update modification times of quota files when userspace can
++		 * start looking at them */
++		handle = ext4_journal_start(inode, 1);
++		if (!IS_ERR(handle)) {
++			inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++			ext4_mark_inode_dirty(handle, inode);
++			ext4_journal_stop(handle);
++		}
++		iput(inode);
++	}
++
+ 	return vfs_quota_off(sb, type, remount);
+ }
+ 
+@@ -4696,9 +4728,8 @@ out:
+ 	if (inode->i_size < off + len) {
+ 		i_size_write(inode, off + len);
+ 		EXT4_I(inode)->i_disksize = inode->i_size;
++		ext4_mark_inode_dirty(handle, inode);
+ 	}
+-	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+-	ext4_mark_inode_dirty(handle, inode);
+ 	mutex_unlock(&inode->i_mutex);
+ 	return len;
+ }
diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch b/ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch
new file mode 100644
index 0000000..5452398
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext4-quota-first-class.patch
@@ -0,0 +1,399 @@
+From: Aditya Kali <adityakali@google.com>
+
+This patch is an attempt towards supporting quotas as first class
+feature in ext4. It is based on the proposal at:
+https://ext4.wiki.kernel.org/index.php/Design_For_1st_Class_Quota_in_Ext4
+This patch introduces a new feature - EXT4_FEATURE_RO_COMPAT_QUOTA which, when
+turned on, enables quota accounting at mount time iteself. Also, the
+quota inodes are stored in two additional superblock fields.
+Some changes introduced by this patch that should be pointed out are:
+1) Two new ext4-superblock fields - s_usr_quota_inum and s_grp_quota_inum
+   for storing the quota inodes in use.
+2) If the QUOTA feature and corresponding quota inodes are set in superblock,
+   Quotas are turned on at mount time irrespective of the quota mount options.
+   Thus the mount options 'quota', 'usrquota' and 'grpquota' are completely
+   ignored with the new QUOTA feature flag.
+3) Default quota inodes are: inode#3 for tracking userquota and inode#4 for
+   tracking group quota. The superblock fields can be set to use other inodes
+   as well.
+4) mke2fs or tune2fs will initialize these inodes when quota feature is
+   being set. The default reserved inodes will not be visible to user as
+   regular files.
+5) Once quotas are turned on, they cannot be turned off while the FS is
+   mounted. This is because we do not want to let the quota get inconsistent.
+6) With the QUOTA feature set, since the quota inodes are hidden, some of the
+   utilities from quota-tools will no longer work correctly. Instead, e2fsprogs
+   will include support for fixing the quota files.
+7) Support is only for the new V2 quota file format.
+
+Signed-off-by: Aditya Kali <adityakali@google.com>
+---
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h	2012-06-26 11:26:23.345235745 +0200
++++ linux-stage/fs/ext4/ext4.h	2012-06-26 11:37:38.250355000 +0200
+@@ -162,6 +162,8 @@ typedef struct ext4_io_end {
+  */
+ #define	EXT4_BAD_INO		 1	/* Bad blocks inode */
+ #define EXT4_ROOT_INO		 2	/* Root inode */
++#define EXT4_USR_QUOTA_INO       3      /* User quota inode */
++#define EXT4_GRP_QUOTA_INO       4      /* Group quota inode */
+ #define EXT4_BOOT_LOADER_INO	 5	/* Boot loader inode */
+ #define EXT4_UNDEL_DIR_INO	 6	/* Undelete directory inode */
+ #define EXT4_RESIZE_INO		 7	/* Reserved group descriptors inode */
+@@ -1016,7 +1018,9 @@ struct ext4_super_block {
+ 	__u8	s_last_error_func[32];	/* function where the error happened */
+ #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
+ 	__u8	s_mount_opts[64];
+-	__le32	s_reserved[112];        /* Padding to the end of the block */
++	__le32	s_usr_quota_inum;	/* inode for tracking user quota */
++	__le32	s_grp_quota_inum;	/* inode for tracking group quota */
++	__le32	s_reserved[110];        /* Padding to the end of the block */
+ };
+ 
+ #ifdef __KERNEL__
+@@ -1090,6 +1094,7 @@ struct ext4_sb_info {
+ #ifdef CONFIG_QUOTA
+ 	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
+ 	int s_jquota_fmt;			/* Format of quota to use */
++	unsigned long s_qf_inums[MAXQUOTAS];    /* Quota file inodes */
+ #endif
+ 	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+ 	struct rb_root system_blks;
+@@ -1189,6 +1194,8 @@ static inline struct timespec ext4_curre
+ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
+ {
+ 	return ino == EXT4_ROOT_INO ||
++		ino == EXT4_USR_QUOTA_INO ||
++		ino == EXT4_GRP_QUOTA_INO ||
+ 		ino == EXT4_JOURNAL_INO ||
+ 		ino == EXT4_RESIZE_INO ||
+ 		(ino >= EXT4_FIRST_INO(sb) &&
+@@ -1293,6 +1300,7 @@ EXT4_INODE_BIT_FNS(state, state_flags)
+ #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM		0x0010
+ #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK	0x0020
+ #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
++#define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
+ 
+ #define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
+ #define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
+@@ -1325,7 +1333,8 @@ EXT4_INODE_BIT_FNS(state, state_flags)
+ 					 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
+ 					 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
+ 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
+-					 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
++					 EXT4_FEATURE_RO_COMPAT_HUGE_FILE| \
++					 EXT4_FEATURE_RO_COMPAT_QUOTA)
+ 
+ /*
+  * Default values for user and/or group using reserved blocks
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h	2012-06-26 11:35:31.025105000 +0200
++++ linux-stage/fs/ext4/ext4_jbd2.h	2012-06-26 11:37:38.250631000 +0200
+@@ -89,14 +89,20 @@
+ #ifdef CONFIG_QUOTA
+ /* Amount of blocks needed for quota update - we know that the structure was
+  * allocated so we need to update only data block */
+-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
++#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
++		EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
++		1 : 0)
+ /* Amount of blocks needed for quota insert/delete - we do some block writes
+  * but inode, sb and group updates are done only once */
+-#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+-		(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
++#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
++		EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
++		(DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
++		 +3+DQUOT_INIT_REWRITE) : 0)
+ 
+-#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+-		(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
++#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
++		EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\
++		(DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
++		 +3+DQUOT_DEL_REWRITE) : 0)
+ #else
+ #define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c	2012-06-26 11:37:30.905374000 +0200
++++ linux-stage/fs/ext4/super.c	2012-06-26 11:38:30.997488000 +0200
+@@ -86,6 +86,11 @@ wait_queue_head_t aio_wq[WQ_HASH_SZ];
+ 
+ static int bigendian_extents;
+ 
++#ifdef CONFIG_QUOTA
++static int ext4_acct_on(struct super_block *sb);
++static int ext4_acct_off(struct super_block *sb);
++#endif
++
+ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
+ 			       struct ext4_group_desc *bg)
+ {
+@@ -670,6 +675,12 @@ static void ext4_put_super(struct super_
+ 
+ 	ext4_unregister_li_request(sb);
+ 
++#ifdef CONFIG_QUOTA
++	/* disable usage tracking which was enabled at mount time */
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
++		ext4_acct_off(sb);
++#endif
++
+ 	flush_workqueue(sbi->dio_unwritten_wq);
+ 	destroy_workqueue(sbi->dio_unwritten_wq);
+ 
+@@ -2142,14 +2153,22 @@ static void ext4_orphan_cleanup(struct s
+ #ifdef CONFIG_QUOTA
+ 	/* Needed for iput() to work correctly and not trash data */
+ 	sb->s_flags |= MS_ACTIVE;
+-	/* Turn on quotas so that they are updated correctly */
+-	for (i = 0; i < MAXQUOTAS; i++) {
+-		if (EXT4_SB(sb)->s_qf_names[i]) {
+-			int ret = ext4_quota_on_mount(sb, i);
+-			if (ret < 0)
+-				ext4_msg(sb, KERN_ERR,
+-					"Cannot turn on journaled "
+-					"quota: error %d", ret);
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++		int ret;
++		ret = ext4_acct_on(sb);
++		if (ret)
++			ext4_msg(sb, KERN_ERR, "Failed to turn on usage "
++				 "tracking for quota: error %d", ret);
++	} else {
++		/* Turn on quotas so that they are updated correctly */
++		for (i = 0; i < MAXQUOTAS; i++) {
++			if (EXT4_SB(sb)->s_qf_names[i]) {
++				int ret = ext4_quota_on_mount(sb, i);
++				if (ret < 0)
++					ext4_msg(sb, KERN_ERR,
++						"Cannot turn on journaled "
++						"quota: error %d", ret);
++			}
+ 		}
+ 	}
+ #endif
+@@ -2193,10 +2212,14 @@ static void ext4_orphan_cleanup(struct s
+ 		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
+ 		       PLURAL(nr_truncates));
+ #ifdef CONFIG_QUOTA
+-	/* Turn quotas off */
+-	for (i = 0; i < MAXQUOTAS; i++) {
+-		if (sb_dqopt(sb)->files[i])
+-			vfs_quota_off(sb, i, 0);
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++		ext4_acct_off(sb);
++	} else {
++		/* Turn quotas off */
++		for (i = 0; i < MAXQUOTAS; i++) {
++			if (sb_dqopt(sb)->files[i])
++				vfs_quota_off(sb, i, 0);
++		}
+ 	}
+ #endif
+ 	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+@@ -3395,6 +3418,15 @@ static int ext4_fill_super(struct super_
+ #ifdef CONFIG_QUOTA
+ 	sb->s_qcop = &ext4_qctl_operations;
+ 	sb->dq_op = &ext4_quota_operations;
++
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++		/* Use new qctl operations with quota on function that does not
++		 * require user specified quota file path. */
++		sb->s_qcop = &ext4_qctl_operations;
++
++		sbi->s_qf_inums[USRQUOTA] = es->s_usr_quota_inum;
++		sbi->s_qf_inums[GRPQUOTA] = es->s_grp_quota_inum;
++	}
+ #endif
+ 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+ 	mutex_init(&sbi->s_orphan_lock);
+@@ -3622,8 +3654,31 @@ no_journal:
+ 	} else
+ 		descr = "out journal";
+ 
+-	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+-		 "Opts: %s%s", descr, sbi->s_es->s_mount_opts,
++#ifdef CONFIG_QUOTA
++	/* Enable space tracking during mount, enforcement can be enabled/disable
++	 * later with quota_on/off */
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
++	    !(sb->s_flags & MS_RDONLY)) {
++		ret = ext4_acct_on(sb);
++		if (ret) {
++			ext4_msg(sb, KERN_ERR, "Can't enable usage tracking on "
++			         "a filesystem with the QUOTA feature set");
++			goto failed_mount4;
++		}
++	}
++#else
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
++	    !(sb->s_flags & MS_RDONLY))
++		ext4_msg(sb, KERN_WARNING, "Mounting a filesystem with the "
++		         "QUOTA feature set whereas the kernel does not "
++			 "support quota, e2fsck will be required to fix usage "
++			 "information");
++
++#endif  /* CONFIG_QUOTA */
++
++	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. quota=%s. "
++		 "Opts: %s%s", descr, sb_any_quota_loaded(sb) ? "on" : "off",
++		 sbi->s_es->s_mount_opts,
+ 		 *sbi->s_es->s_mount_opts ? "; " : "");
+ 
+ 	lock_kernel();
+@@ -3981,6 +4036,12 @@ static int ext4_commit_super(struct supe
+ 					&EXT4_SB(sb)->s_freeblocks_counter));
+ 	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
+ 					&EXT4_SB(sb)->s_freeinodes_counter));
++#ifdef CONFIG_QUOTA
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++		es->s_usr_quota_inum = EXT4_SB(sb)->s_qf_inums[USRQUOTA];
++		es->s_grp_quota_inum = EXT4_SB(sb)->s_qf_inums[GRPQUOTA];
++	}
++#endif
+ 	sb->s_dirt = 0;
+ 	BUFFER_TRACE(sbh, "marking dirty");
+ 	mark_buffer_dirty(sbh);
+@@ -4531,6 +4592,22 @@ static int ext4_quota_on(struct super_bl
+ 	int err;
+ 	struct path path;
+ 
++	/* When QUOTA feature is set, quota on enables enforcement, accounting
++	 * being already enabled at mount time */
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
++		struct inode *qf_inode;
++
++	        if (!EXT4_SB(sb)->s_qf_inums[type])
++			return -EINVAL;
++		qf_inode = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[type]);
++		if (IS_ERR(qf_inode))
++			return PTR_ERR(qf_inode);
++		err = vfs_quota_enable(qf_inode, type, QFMT_VFS_V1,
++		                       DQUOT_LIMITS_ENABLED);
++		iput(qf_inode);
++		return err;
++	}
++
+ 	if (!test_opt(sb, QUOTA))
+ 		return -EINVAL;
+ 	/* When remounting, no checks are needed and in fact, name is NULL */
+@@ -4630,9 +4707,114 @@ static int ext4_quota_off(struct super_b
+ 		iput(inode);
+ 	}
+ 
++	/* When QUOTA feature is set, quota off just disables enforcement but
++	 * leaves accounting on */
++	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
++		return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
++
+ 	return vfs_quota_off(sb, type, remount);
+ }
+ 
++/*
++ * New quota_on function that is used to turn accounting on when QUOTA
++ * feature is set.
++ */
++static int ext4_acct_on(struct super_block *sb)
++{
++	struct inode *qf_inode[MAXQUOTAS];
++	int           rc;
++
++	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
++	    !EXT4_SB(sb)->s_qf_inums[USRQUOTA] ||
++	    !EXT4_SB(sb)->s_qf_inums[GRPQUOTA])
++		return -EINVAL;
++
++	qf_inode[USRQUOTA] = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[USRQUOTA]);
++	if (IS_ERR(qf_inode[USRQUOTA])) {
++		EXT4_SB(sb)->s_qf_inums[USRQUOTA] = 0;
++		return PTR_ERR(qf_inode[USRQUOTA]);
++	}
++	qf_inode[GRPQUOTA] = ext4_iget(sb, EXT4_SB(sb)->s_qf_inums[GRPQUOTA]);
++	if (IS_ERR(qf_inode[GRPQUOTA])) {
++		iput(qf_inode[USRQUOTA]);
++		EXT4_SB(sb)->s_qf_inums[GRPQUOTA] = 0;
++		return PTR_ERR(qf_inode[GRPQUOTA]);
++	}
++
++	/*
++	 * When we journal data on quota file, we have to flush journal to see
++	 * all updates to the file when we bypass pagecache...
++	 */
++	if (EXT4_SB(sb)->s_journal) {
++		/*
++		 * We don't need to lock updates but journal_flush() could
++		 * otherwise be livelocked...
++		 */
++		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
++		rc = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
++		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
++		if (rc) {
++			iput(qf_inode[USRQUOTA]);
++			iput(qf_inode[GRPQUOTA]);
++			return rc;
++		}
++	}
++
++	/* only enable quota accounting by default */
++	rc = vfs_quota_enable(qf_inode[USRQUOTA], USRQUOTA, QFMT_VFS_V1,
++		              DQUOT_USAGE_ENABLED);
++	iput(qf_inode[USRQUOTA]);
++	if (rc) {
++		iput(qf_inode[GRPQUOTA]);
++		return rc;
++	}
++	rc = vfs_quota_enable(qf_inode[GRPQUOTA], GRPQUOTA, QFMT_VFS_V1,
++			      DQUOT_USAGE_ENABLED);
++	iput(qf_inode[GRPQUOTA]);
++	return rc;
++}
++
++/*
++ * New quota_on function that is used to turn off accounting when QUOTA feature
++ * is set.
++ */
++static int ext4_acct_off(struct super_block *sb)
++{
++	int type, rc = 0;
++
++	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
++		return -EINVAL;
++
++	for (type = 0; type < MAXQUOTAS; type++) {
++		struct inode *inode = sb_dqopt(sb)->files[type];
++		handle_t     *handle;
++
++		if (!inode)
++			continue;
++		/* Update modification times of quota files when userspace can
++		 * start looking at them */
++		handle = ext4_journal_start(inode, 1);
++		if (IS_ERR(handle))
++			goto out;
++
++		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++		ext4_mark_inode_dirty(handle, inode);
++		ext4_journal_stop(handle);
++	}
++
++out:
++	for (type = 0; type < MAXQUOTAS; type++) {
++		int ret;
++		ret = vfs_quota_disable(sb, type,
++				    DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
++		if (!rc && ret)
++			rc = ret;
++	}
++	return rc;
++}
++
++
++
+ /* Read data from quotafile - avoid pagecache and such because we cannot afford
+  * acquiring the locks... As quota files are never truncated and quota code
+  * itself serializes the operations (and noone else should touch the files)
diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch b/ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch
new file mode 100644
index 0000000..d72dd05
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext4-quota-force-block-alloc-quotaoff.patch
@@ -0,0 +1,62 @@
+commit ca0e05e4b15193aeba72b995e90de990db7f8304
+Author: Dmitry Monakhov <dmonakhov@openvz.org>
+Date:   Sun Aug 1 17:48:36 2010 -0400
+
+    ext4: force block allocation on quota_off
+    
+    Perform full sync procedure so that any delayed allocation blocks are
+    allocated so quota will be consistent.
+    
+    Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c	2012-06-26 09:37:06.039508000 +0200
++++ linux-stage/fs/ext4/super.c	2012-06-26 11:35:09.824099000 +0200
+@@ -1104,6 +1104,7 @@ static int ext4_mark_dquot_dirty(struct
+ static int ext4_write_info(struct super_block *sb, int type);
+ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
+ 				char *path, int remount);
++static int ext4_quota_off(struct super_block *sb, int type, int remount);
+ static int ext4_quota_on_mount(struct super_block *sb, int type);
+ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
+ 			       size_t len, loff_t off);
+@@ -1173,7 +1174,7 @@ static const struct dquot_operations ext
+ 
+ static const struct quotactl_ops ext4_qctl_operations = {
+ 	.quota_on	= ext4_quota_on,
+-	.quota_off	= vfs_quota_off,
++	.quota_off	= ext4_quota_off,
+ 	.quota_sync	= vfs_quota_sync,
+ 	.get_info	= vfs_get_dqinfo,
+ 	.set_info	= vfs_set_dqinfo,
+@@ -4578,6 +4579,28 @@ static int ext4_quota_on(struct super_bl
+ 	return err;
+ }
+ 
++static int ext4_quota_off(struct super_block *sb, int type, int remount)
++{
++	struct quota_info *dqopt = sb_dqopt(sb);
++
++	mutex_lock(&dqopt->dqonoff_mutex);
++	if (!sb_any_quota_loaded(sb)) {
++		/* nothing to do */
++		mutex_unlock(&dqopt->dqonoff_mutex);
++		return 0;
++	}
++	mutex_unlock(&dqopt->dqonoff_mutex);
++
++	/* Force all delayed allocation blocks to be allocated. */
++	if (test_opt(sb, DELALLOC)) {
++		down_read(&sb->s_umount);
++		sync_filesystem(sb);
++		up_read(&sb->s_umount);
++	}
++
++	return vfs_quota_off(sb, type, remount);
++}
++
+ /* Read data from quotafile - avoid pagecache and such because we cannot afford
+  * acquiring the locks... As quota files are never truncated and quota code
+  * itself serializes the operations (and noone else should touch the files)
diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch
new file mode 100644
index 0000000..1e98c8f
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch
@@ -0,0 +1,20 @@
+Index: linux-2.6.18-238.12.1/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.18-238.12.1.orig/fs/ext4/ext4.h	2011-09-21 17:55:44.627741549 +0200
++++ linux-2.6.18-238.12.1/fs/ext4/ext4.h	2011-09-21 18:05:20.974106450 +0200
+@@ -971,6 +971,7 @@
+ #ifdef CONFIG_QUOTA
+ 	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
+ 	int s_jquota_fmt;			/* Format of quota to use */
++	unsigned long s_qf_inums[MAXQUOTAS];    /* Quota file inodes */
+ #endif
+ 	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+ 	struct rb_root system_blks;
+@@ -1171,6 +1172,7 @@
+ #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM		0x0010
+ #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK	0x0020
+ #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
++#define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
+ 
+ #define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
+ #define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
index 0cc00f9..abf7009 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
@@ -38,3 +38,4 @@ ext4-vmalloc-rhel5.patch
 ext4-mballoc-group_check-rhel5.patch
 ext4-journal-callback-rhel5.patch
 ext4-store-tree-generation-at-find.patch
+ext4-quota-minimal-rhel5.patch
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
index 1dcd44e..416916f 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
@@ -35,3 +35,6 @@ ext4-vmalloc-rhel6.patch
 ext4-journal-callback.patch
 ext4-store-tree-generation-at-find.patch
 ext4_pdirop-rhel6.patch
+ext4-quota-force-block-alloc-quotaoff.patch
+ext4-quota-dont-update-cmtime.patch
+ext4-quota-first-class.patch
diff --git a/lustre/osd-ldiskfs/Makefile.in b/lustre/osd-ldiskfs/Makefile.in
index 179bf2f..fda3f84 100644
--- a/lustre/osd-ldiskfs/Makefile.in
+++ b/lustre/osd-ldiskfs/Makefile.in
@@ -1,7 +1,7 @@
 MODULES := osd_ldiskfs
 osd_ldiskfs-objs := osd_handler.o osd_oi.o osd_igif.o osd_lproc.o osd_iam.o \
 		    osd_iam_lfix.o osd_iam_lvar.o osd_io.o osd_compat.o \
-		    osd_scrub.o
+		    osd_scrub.o osd_quota.o osd_quota_fmt.o
 
 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs
 
diff --git a/lustre/osd-ldiskfs/autoMakefile.am b/lustre/osd-ldiskfs/autoMakefile.am
index 3294c28..2952a75 100644
--- a/lustre/osd-ldiskfs/autoMakefile.am
+++ b/lustre/osd-ldiskfs/autoMakefile.am
@@ -40,4 +40,4 @@ endif
 
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
 EXTRA_DIST := $(osd_ldiskfs-objs:%.o=%.c) osd_internal.h osd_oi.h osd_igif.h \
-	      osd_iam.h osd_scrub.h
+	      osd_iam.h osd_scrub.h osd_quota_fmt.h
diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c
index ed4ce8d..eced8db 100644
--- a/lustre/osd-ldiskfs/osd_handler.c
+++ b/lustre/osd-ldiskfs/osd_handler.c
@@ -72,6 +72,8 @@
 
 /* llo_* api support */
 #include <md_object.h>
+/* dt_acct_features */
+#include <lquota.h>
 
 #ifdef HAVE_LDISKFS_PDO
 int ldiskfs_pdo = 1;
@@ -1501,6 +1503,32 @@ static int osd_inode_setattr(const struct lu_env *env,
         return 0;
 }
 
+static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr)
+{
+	if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) ||
+	    (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) {
+		struct iattr	iattr;
+		int		rc;
+
+		iattr.ia_valid = 0;
+		if (attr->la_valid & LA_UID)
+			iattr.ia_valid |= ATTR_UID;
+		if (attr->la_valid & LA_GID)
+			iattr.ia_valid |= ATTR_GID;
+		iattr.ia_uid = attr->la_uid;
+		iattr.ia_gid = attr->la_gid;
+
+		rc = ll_vfs_dq_transfer(inode, &iattr);
+		if (rc) {
+			CERROR("%s: quota transfer failed: rc = %d. Is quota "
+			       "enforcement enabled on the ldiskfs filesystem?",
+			       inode->i_sb->s_id, rc);
+			return rc;
+		}
+	}
+	return 0;
+}
+
 static int osd_attr_set(const struct lu_env *env,
                         struct dt_object *dt,
                         const struct lu_attr *attr,
@@ -1521,28 +1549,34 @@ static int osd_attr_set(const struct lu_env *env,
         OSD_EXEC_OP(handle, attr_set);
 
         inode = obj->oo_inode;
+	if (LDISKFS_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+					  LDISKFS_FEATURE_RO_COMPAT_QUOTA)) {
+		rc = osd_quota_transfer(inode, attr);
+		if (rc)
+			return rc;
+	} else {
 #ifdef HAVE_QUOTA_SUPPORT
-        if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) ||
-            (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) {
-                struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt;
-                struct iattr iattr;
-                int rc;
-
-                iattr.ia_valid = 0;
-                if (attr->la_valid & LA_UID)
-                        iattr.ia_valid |= ATTR_UID;
-                if (attr->la_valid & LA_GID)
-                        iattr.ia_valid |= ATTR_GID;
-                iattr.ia_uid = attr->la_uid;
-                iattr.ia_gid = attr->la_gid;
-                osd_push_ctxt(env, save);
-                rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
-                osd_pop_ctxt(save);
-                if (rc != 0)
-                        return rc;
-        }
+		if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) ||
+		    (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) {
+			struct osd_ctxt	*save = &osd_oti_get(env)->oti_ctxt;
+			struct		 iattr iattr;
+			int		 rc;
+
+			iattr.ia_valid = 0;
+			if (attr->la_valid & LA_UID)
+				iattr.ia_valid |= ATTR_UID;
+			if (attr->la_valid & LA_GID)
+				iattr.ia_valid |= ATTR_GID;
+			iattr.ia_uid = attr->la_uid;
+			iattr.ia_gid = attr->la_gid;
+			osd_push_ctxt(env, save);
+			rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
+			osd_pop_ctxt(save);
+			if (rc != 0)
+				return rc;
+		}
 #endif
-
+	}
         cfs_spin_lock(&obj->oo_guard);
         rc = osd_inode_setattr(env, inode, attr);
         cfs_spin_unlock(&obj->oo_guard);
@@ -1552,26 +1586,6 @@ static int osd_attr_set(const struct lu_env *env,
         return rc;
 }
 
-/*
- * Object creation.
- *
- * XXX temporary solution.
- */
-static int osd_create_pre(struct osd_thread_info *info, struct osd_object *obj,
-                          struct lu_attr *attr, struct thandle *th)
-{
-        return 0;
-}
-
-static int osd_create_post(struct osd_thread_info *info, struct osd_object *obj,
-                           struct lu_attr *attr, struct thandle *th)
-{
-        osd_object_init0(obj);
-        if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW))
-                unlock_new_inode(obj->oo_inode);
-        return 0;
-}
-
 struct dentry *osd_child_dentry_get(const struct lu_env *env,
                                     struct osd_object *obj,
                                     const char *name, const int namelen)
@@ -1681,7 +1695,7 @@ static int osd_mk_index(struct osd_thread_info *info, struct osd_object *obj,
         struct osd_thandle *oth;
         const struct dt_index_features *feat = dof->u.dof_idx.di_feat;
 
-        __u32 mode = (attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX));
+        __u32 mode = (attr->la_mode & (S_IFMT | S_IALLUGO | S_ISVTX));
 
         LASSERT(S_ISREG(attr->la_mode));
 
@@ -1716,7 +1730,7 @@ static int osd_mkreg(struct osd_thread_info *info, struct osd_object *obj,
 {
         LASSERT(S_ISREG(attr->la_mode));
         return osd_mkfile(info, obj, (attr->la_mode &
-                               (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th);
+                               (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th);
 }
 
 static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj,
@@ -1727,7 +1741,7 @@ static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj,
 {
         LASSERT(S_ISLNK(attr->la_mode));
         return osd_mkfile(info, obj, (attr->la_mode &
-                              (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th);
+                              (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th);
 }
 
 static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj,
@@ -1736,7 +1750,7 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj,
                      struct dt_object_format *dof,
                      struct thandle *th)
 {
-        cfs_umode_t mode = attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX);
+        cfs_umode_t mode = attr->la_mode & (S_IFMT | S_IALLUGO | S_ISVTX);
         int result;
 
         LINVRNT(osd_invariant(obj));
@@ -1747,6 +1761,10 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj,
         result = osd_mkfile(info, obj, mode, hint, th);
         if (result == 0) {
                 LASSERT(obj->oo_inode != NULL);
+		/*
+		 * This inode should be marked dirty for i_rdev.  Currently
+		 * that is done in the osd_attr_init().
+		 */
                 init_special_inode(obj->oo_inode, mode, attr->la_rdev);
         }
         LINVRNT(osd_invariant(obj));
@@ -1798,6 +1816,51 @@ static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah,
         ah->dah_mode = child_mode;
 }
 
+static void osd_attr_init(struct osd_thread_info *info, struct osd_object *obj,
+			  struct lu_attr *attr, struct dt_object_format *dof)
+{
+	struct inode   *inode = obj->oo_inode;
+	__u64           valid = attr->la_valid;
+	int             result;
+
+	attr->la_valid &= ~(LA_TYPE | LA_MODE);
+
+        if (dof->dof_type != DFT_NODE)
+                attr->la_valid &= ~LA_RDEV;
+        if ((valid & LA_ATIME) && (attr->la_atime == LTIME_S(inode->i_atime)))
+                attr->la_valid &= ~LA_ATIME;
+        if ((valid & LA_CTIME) && (attr->la_ctime == LTIME_S(inode->i_ctime)))
+                attr->la_valid &= ~LA_CTIME;
+        if ((valid & LA_MTIME) && (attr->la_mtime == LTIME_S(inode->i_mtime)))
+                attr->la_valid &= ~LA_MTIME;
+
+	if (LDISKFS_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+					  LDISKFS_FEATURE_RO_COMPAT_QUOTA)) {
+		result = osd_quota_transfer(inode, attr);
+		if (result)
+			return;
+	} else {
+#ifdef HAVE_QUOTA_SUPPORT
+		attr->la_valid &= ~(LA_UID | LA_GID);
+#endif
+	}
+
+        if (attr->la_valid != 0) {
+                result = osd_inode_setattr(info->oti_env, inode, attr);
+                /*
+                 * The osd_inode_setattr() should always succeed here.  The
+                 * only error that could be returned is EDQUOT when we are
+                 * trying to change the UID or GID of the inode. However, this
+                 * should not happen since quota enforcement is no longer
+                 * enabled on ldiskfs (lquota takes care of it).
+                 */
+                LASSERTF(result == 0, "%d", result);
+                inode->i_sb->s_op->dirty_inode(inode);
+        }
+
+        attr->la_valid = valid;
+}
+
 /**
  * Helper function for osd_object_create()
  *
@@ -1809,16 +1872,26 @@ static int __osd_object_create(struct osd_thread_info *info,
                                struct dt_object_format *dof,
                                struct thandle *th)
 {
+	int	result;
+	__u32	umask;
 
-        int result;
+	/* we drop umask so that permissions we pass are not affected */
+	umask = current->fs->umask;
+	current->fs->umask = 0;
 
-        result = osd_create_pre(info, obj, attr, th);
+	result = osd_create_type_f(dof->dof_type)(info, obj, attr, hint, dof,
+						  th);
         if (result == 0) {
-                result = osd_create_type_f(dof->dof_type)(info, obj,
-                                           attr, hint, dof, th);
-                if (result == 0)
-                        result = osd_create_post(info, obj, attr, th);
+		osd_attr_init(info, obj, attr, dof);
+		osd_object_init0(obj);
+		/* bz 24037 */
+		if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW))
+			unlock_new_inode(obj->oo_inode);
         }
+
+	/* restore previous umask value */
+	current->fs->umask = umask;
+
         return result;
 }
 
@@ -1900,6 +1973,11 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt,
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
+	if (unlikely(fid_is_acct(fid)))
+		/* Quota files can't be created from the kernel any more,
+		 * 'tune2fs -O quota' will take care of creating them */
+		RETURN(-EPERM);
+
         OSD_EXEC_OP(th, create);
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
@@ -1958,6 +2036,9 @@ static int osd_object_destroy(const struct lu_env *env,
         LASSERT(inode);
         LASSERT(!lu_object_is_dying(dt->do_lu.lo_header));
 
+	if (unlikely(fid_is_acct(fid)))
+		RETURN(-EPERM);
+
 	/* Parallel control for OI scrub. For most of cases, there is no
 	 * lock contention. So it will not affect unlink performance. */
 	cfs_mutex_lock(&inode->i_mutex);
@@ -2107,6 +2188,11 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
+	if (unlikely(fid_is_acct(fid)))
+		/* Quota files can't be created from the kernel any more,
+		 * 'tune2fs -O quota' will take care of creating them */
+		RETURN(-EPERM);
+
         OSD_EXEC_OP(th, create);
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
@@ -2601,10 +2687,10 @@ static int osd_iam_container_init(const struct lu_env *env,
 static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
                          const struct dt_index_features *feat)
 {
-        int result;
-        int ea_dir = 0;
-        struct osd_object *obj = osd_dt_obj(dt);
-        struct osd_device *osd = osd_obj2dev(obj);
+	int			 result;
+	int			 skip_iam = 0;
+	struct osd_object	*obj = osd_dt_obj(dt);
+	struct osd_device	*osd = osd_obj2dev(obj);
 
         LINVRNT(osd_invariant(obj));
         LASSERT(dt_object_exists(dt));
@@ -2618,10 +2704,14 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
                         result = 0;
                 else
                         result = -ENOTDIR;
-                ea_dir = 1;
+		skip_iam = 1;
 	} else if (unlikely(feat == &dt_otable_features)) {
 		dt->do_index_ops = &osd_otable_ops;
 		return 0;
+	} else if (feat == &dt_acct_features) {
+		dt->do_index_ops = &osd_acct_index_ops;
+		result = 0;
+		skip_iam = 1;
         } else if (!osd_has_index(obj)) {
                 struct osd_directory *dir;
 
@@ -2657,7 +2747,7 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
                 result = 0;
         }
 
-        if (result == 0 && ea_dir == 0) {
+	if (result == 0 && skip_iam == 0) {
                 if (!osd_iam_index_probe(env, obj, feat))
                         result = -ENOTDIR;
         }
@@ -4179,7 +4269,10 @@ struct lu_context_key osd_key = {
 static int osd_device_init(const struct lu_env *env, struct lu_device *d,
                            const char *name, struct lu_device *next)
 {
-        return osd_procfs_init(osd_dev(d), name);
+	struct osd_device *osd = osd_dev(d);
+
+	strncpy(osd->od_svname, name, MAX_OBD_NAME);
+	return osd_procfs_init(osd, name);
 }
 
 static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
@@ -4190,7 +4283,13 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
 
 	if (o->od_fsops) {
 		fsfilt_put_ops(o->od_fsops);
-	o->od_fsops = NULL;
+		o->od_fsops = NULL;
+	}
+
+	/* shutdown quota slave instance associated with the device */
+	if (o->od_quota_slave != NULL) {
+		qsd_fini(env, o->od_quota_slave);
+		o->od_quota_slave = NULL;
 	}
 
 	RETURN(0);
@@ -4358,10 +4457,19 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
         if (result < 0)
                 RETURN(result);
 
+	/* 2. setup quota slave instance */
+	osd->od_quota_slave = qsd_init(env, osd->od_svname, &osd->od_dt_dev,
+				       osd->od_proc_entry);
+	if (IS_ERR(osd->od_quota_slave)) {
+		result = PTR_ERR(osd->od_quota_slave);
+		osd->od_quota_slave = NULL;
+		RETURN(result);
+	}
+
         if (!lu_device_is_md(pdev))
                 RETURN(0);
 
-        /* 2. setup local objects */
+        /* 3. setup local objects */
         result = llo_local_objects_setup(env, lu2md_dev(pdev), lu2dt_dev(dev));
         RETURN(result);
 }
diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h
index 7e573d1..b757b53 100644
--- a/lustre/osd-ldiskfs/osd_internal.h
+++ b/lustre/osd-ldiskfs/osd_internal.h
@@ -72,10 +72,12 @@
 #include <obd_class.h>
 #include <lustre_disk.h>
 #include <dt_object.h>
+#include <lquota.h>
 
 #include "osd_oi.h"
 #include "osd_iam.h"
 #include "osd_scrub.h"
+#include "osd_quota_fmt.h"
 
 struct inode;
 
@@ -301,6 +303,12 @@ struct osd_device {
 	cfs_mutex_t		  od_otable_mutex;
 	struct osd_otable_it	 *od_otable_it;
 	struct osd_scrub	  od_scrub;
+
+	/* service name associated with the osd device */
+	char                      od_svname[MAX_OBD_NAME];
+
+	/* quota slave instance */
+	struct qsd_instance      *od_quota_slave;
 };
 
 #define OSD_TRACK_DECLARES
@@ -465,6 +473,19 @@ struct osd_it_iam {
         struct iam_iterator    oi_it;
 };
 
+/**
+ * Iterator's in-memory data structure for quota file.
+ */
+struct osd_it_quota {
+	struct osd_object	*oiq_obj;
+	/** tree blocks path to where the entry is stored */
+	uint			 oiq_blk[LUSTRE_DQTREEDEPTH];
+	/** on-disk offset for current key where quota record can be found */
+	loff_t			 oiq_offset;
+	/** identifier for current quota record */
+	__u64			 oiq_id;
+};
+
 #define MAX_BLOCKS_PER_PAGE (CFS_PAGE_SIZE / 512)
 
 struct osd_iobuf {
@@ -528,11 +549,13 @@ struct osd_thread_info {
 
         /** osd iterator context used for iterator session */
 
-        union {
-                struct osd_it_iam      oti_it;
-                /** ldiskfs iterator data structure, see osd_it_ea_{init, fini} */
-                struct osd_it_ea       oti_it_ea;
-        };
+	union {
+		struct osd_it_iam	oti_it;
+		/* ldiskfs iterator data structure,
+		 * see osd_it_ea_{init, fini} */
+		struct osd_it_ea	oti_it_ea;
+		struct osd_it_quota	oti_it_quota;
+	};
 
         /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */
         void                  *oti_it_ea_buf;
@@ -573,6 +596,12 @@ struct osd_thread_info {
 #define OSD_FID_REC_SZ 32
         char                   oti_ldp[OSD_FID_REC_SZ];
         char                   oti_ldp2[OSD_FID_REC_SZ];
+
+	/* used by quota code */
+	union {
+		struct if_dqblk		oti_dqblk;
+		struct if_dqinfo	oti_dqinfo;
+	};
 };
 
 extern int ldiskfs_pdo;
@@ -629,6 +658,17 @@ int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic,
 int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
 		   struct osd_inode_id *id);
 
+/* osd_quota_fmt.c */
+int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj,
+                      int type, uint blk, int depth, uint index,
+                      struct osd_it_quota *it);
+int walk_block_dqentry(const struct lu_env *env, struct osd_object *obj,
+                       int type, uint blk, uint index,
+                       struct osd_it_quota *it);
+loff_t find_tree_dqentry(const struct lu_env *env,
+                         struct osd_object *obj, int type,
+                         qid_t dqid, uint blk, int depth,
+                         struct osd_it_quota *it);
 /*
  * Invariants, assertions.
  */
@@ -819,5 +859,12 @@ int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack)
         return result;
 }
 
+/**
+ * Quota/Accounting handling
+ */
+extern const struct dt_index_operations osd_acct_index_ops;
+int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd,
+			const struct lu_fid *fid, struct osd_inode_id *id);
+
 #endif /* __KERNEL__ */
 #endif /* _OSD_INTERNAL_H */
diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c
index 544eabc..dfa42ab 100644
--- a/lustre/osd-ldiskfs/osd_oi.c
+++ b/lustre/osd-ldiskfs/osd_oi.c
@@ -508,6 +508,9 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
 		osd_id_gen(id, osd_sb(osd)->s_root->d_inode->i_ino,
 			   osd_sb(osd)->s_root->d_inode->i_generation);
 	} else {
+		if (unlikely(fid_is_acct(fid)))
+			return osd_acct_obj_lookup(info, osd, fid, id);
+
 		if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE))
 			return osd_compat_spec_lookup(info, osd, fid, id);
 
diff --git a/lustre/osd-ldiskfs/osd_quota.c b/lustre/osd-ldiskfs/osd_quota.c
new file mode 100644
index 0000000..e636c6d
--- /dev/null
+++ b/lustre/osd-ldiskfs/osd_quota.c
@@ -0,0 +1,389 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Use is subject to license terms.
+ *
+ * Author: Johann Lombardi <johann@whamcloud.com>
+ * Author: Niu    Yawei    <niu@whamcloud.com>
+ */
+
+#include <lquota.h>
+#include "osd_internal.h"
+
+/**
+ * Helpers function to find out the quota type (USRQUOTA/GRPQUOTA) of a
+ * given object
+ */
+static inline int fid2type(const struct lu_fid *fid)
+{
+	LASSERT(fid_is_acct(fid));
+	if (fid_oid(fid) == ACCT_GROUP_OID)
+		return GRPQUOTA;
+	return USRQUOTA;
+}
+
+static inline int obj2type(struct dt_object *obj)
+{
+	return fid2type(lu_object_fid(&obj->do_lu));
+}
+
+/**
+ * Space Accounting Management
+ */
+
+/**
+ * Look up an accounting object based on its fid.
+ *
+ * \param info - is the osd thread info passed by the caller
+ * \param osd  - is the osd device
+ * \param fid  - is the fid of the accounting object we want to look up
+ * \param id   - is the osd_inode_id struct to fill with the inode number of
+ *               the quota file if the lookup is successful
+ */
+int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd,
+			const struct lu_fid *fid, struct osd_inode_id *id)
+{
+	struct super_block *sb = osd_sb(osd);
+
+	ENTRY;
+	LASSERT(fid_is_acct(fid));
+
+	if (!LDISKFS_HAS_RO_COMPAT_FEATURE(sb,
+					   LDISKFS_FEATURE_RO_COMPAT_QUOTA))
+		RETURN(-ENOENT);
+
+	id->oii_gen = OSD_OII_NOGEN;
+	id->oii_ino = LDISKFS_SB(sb)->s_qf_inums[fid2type(fid)];
+	if (!ldiskfs_valid_inum(sb, id->oii_ino))
+		RETURN(-ENOENT);
+	RETURN(0);
+}
+
+/**
+ * Return space usage (#blocks & #inodes) consumed by a given uid or gid.
+ *
+ * \param env   - is the environment passed by the caller
+ * \param dtobj - is the accounting object
+ * \param dtrec - is the record to fill with space usage information
+ * \param dtkey - is the id the of the user or group for which we would
+ *                like to access disk usage.
+ * \param capa - is the capability, not used.
+ *
+ * \retval +ve - success : exact match
+ * \retval -ve - failure
+ */
+static int osd_acct_index_lookup(const struct lu_env *env,
+				 struct dt_object *dtobj,
+				 struct dt_rec *dtrec,
+				 const struct dt_key *dtkey,
+				 struct lustre_capa *capa)
+{
+	struct osd_thread_info	*info = osd_oti_get(env);
+	struct if_dqblk		*dqblk = &info->oti_dqblk;
+	struct super_block	*sb = osd_sb(osd_obj2dev(osd_dt_obj(dtobj)));
+	struct acct_rec		*rec = (struct acct_rec *)dtrec;
+	__u64			 id = *((__u64 *)dtkey);
+	int			 rc;
+
+	ENTRY;
+
+	memset((void *)dqblk, 0, sizeof(struct obd_dqblk));
+	rc = sb->s_qcop->get_dqblk(sb, obj2type(dtobj), (qid_t) id, dqblk);
+	if (rc)
+		RETURN(rc);
+	rec->bspace = dqblk->dqb_curspace;
+	rec->ispace = dqblk->dqb_curinodes;
+	RETURN(+1);
+}
+
+#define QUOTA_IT_READ_ERROR(it, rc)                                    \
+	CERROR("%s: Error while trying to read quota information, "    \
+	       "failed with %d\n",                                     \
+	       it->oiq_obj->oo_dt.do_lu.lo_dev->ld_obd->obd_name, rc); \
+
+/**
+ * Initialize osd Iterator for given osd index object.
+ *
+ * \param  dt    - osd index object
+ * \param  attr  - not used
+ * \param  capa  - BYPASS_CAPA
+ */
+static struct dt_it *osd_it_acct_init(const struct lu_env *env,
+				      struct dt_object *dt,
+				      __u32 attr, struct lustre_capa *capa)
+{
+	struct osd_thread_info	*info = osd_oti_get(env);
+	struct osd_it_quota	*it;
+	struct lu_object	*lo = &dt->do_lu;
+	struct osd_object	*obj = osd_dt_obj(dt);
+
+	ENTRY;
+
+	LASSERT(lu_object_exists(lo));
+
+	if (info == NULL)
+		RETURN(ERR_PTR(-ENOMEM));
+
+	it = &info->oti_it_quota;
+	memset(it, 0, sizeof(*it));
+	lu_object_get(lo);
+	it->oiq_obj = obj;
+
+	/* LUSTRE_DQTREEOFF is the initial offset where the tree can be found */
+	it->oiq_blk[0] = LUSTRE_DQTREEOFF;
+
+	/* NB: we don't need to store the tree depth since it is always
+	 * equal to LUSTRE_DQTREEDEPTH - 1 (root has depth = 0) for a leaf
+	 * block. */
+	RETURN((struct dt_it *)it);
+}
+
+/**
+ * Free given iterator.
+ *
+ * \param  di   - osd iterator
+ */
+static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
+{
+	struct osd_it_quota *it = (struct osd_it_quota *)di;
+
+	ENTRY;
+	lu_object_put(env, &it->oiq_obj->oo_dt.do_lu);
+	EXIT;
+}
+
+/**
+ * Move Iterator to record specified by \a key, if the \a key isn't found,
+ * move to the first valid record.
+ *
+ * \param  di   - osd iterator
+ * \param  key  - uid or gid
+ *
+ * \retval +ve  - di points to the first valid record
+ * \retval  +1  - di points to exact matched key
+ * \retval -ve  - failure
+ */
+static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
+			   const struct dt_key *key)
+{
+	struct osd_it_quota	*it = (struct osd_it_quota *)di;
+	const struct lu_fid	*fid =
+				lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
+	int			 type = fid2type(fid);
+	qid_t			 dqid = *(qid_t *)key;
+	loff_t			 offset;
+	int			 rc;
+
+	ENTRY;
+
+	offset = find_tree_dqentry(env, it->oiq_obj, type, dqid,
+				   LUSTRE_DQTREEOFF, 0, it);
+	if (offset > 0) { /* Found */
+		RETURN(+1);
+	} else if (offset < 0) { /* Error */
+		QUOTA_IT_READ_ERROR(it, (int)offset);
+		RETURN((int)offset);
+	}
+
+	/* The @key is not found, move to the first valid entry */
+	rc = walk_tree_dqentry(env, it->oiq_obj, type, it->oiq_blk[0], 0,
+			       0, it);
+	if (rc == 0)
+		rc = 1;
+	else if (rc > 0)
+		rc = -ENOENT;
+
+	RETURN(rc);
+}
+
+/**
+ * Release Iterator
+ *
+ * \param  di   - osd iterator
+ */
+static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
+{
+	return;
+}
+
+/**
+ * Move on to the next valid entry.
+ *
+ * \param  di   - osd iterator
+ *
+ * \retval +ve  - iterator reached the end
+ * \retval   0  - iterator has not reached the end yet
+ * \retval -ve  - unexpected failure
+ */
+static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
+{
+	struct osd_it_quota	*it = (struct osd_it_quota *)di;
+	const struct lu_fid	*fid =
+				lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
+	int			 type = fid2type(fid);
+	int			 depth, rc;
+	uint			 index;
+
+	ENTRY;
+
+	/* Let's first check if there are any remaining valid entry in the
+	 * current leaf block. Start with the next entry after the current one.
+	 */
+	depth = LUSTRE_DQTREEDEPTH - 1;
+	index = GETIDINDEX(it->oiq_id, depth);
+	if (++index < LUSTRE_DQSTRINBLK) {
+		/* Search for the next valid entry from current index */
+		rc = walk_block_dqentry(env, it->oiq_obj, type,
+					it->oiq_blk[depth], index, it);
+		if (rc < 0) {
+			QUOTA_IT_READ_ERROR(it, rc);
+			RETURN(rc);
+		} else if (rc == 0) {
+			/* Found on entry, @it is already updated to the
+			 * new position in walk_block_dqentry(). */
+			RETURN(0);
+		}
+	}
+	rc = 1;
+
+	/* We have consumed all the entries of the current leaf block, move on
+	 * to the next one. */
+	depth--;
+
+	/* We keep searching as long as walk_tree_dqentry() returns +1
+	 * (= no valid entry found). */
+	for (; depth >= 0 && rc > 0; depth--) {
+		index = GETIDINDEX(it->oiq_id, depth);
+		if (++index > 0xff)
+			continue;
+		rc = walk_tree_dqentry(env, it->oiq_obj, type,
+				       it->oiq_blk[depth], depth, index, it);
+	}
+
+	if (rc < 0)
+		QUOTA_IT_READ_ERROR(it, rc);
+	RETURN(rc);
+}
+
+/**
+ * Return pointer to the key under iterator.
+ *
+ * \param  di   - osd iterator
+ */
+static struct dt_key *osd_it_acct_key(const struct lu_env *env,
+				      const struct dt_it *di)
+{
+	struct osd_it_quota *it = (struct osd_it_quota *)di;
+
+	ENTRY;
+	RETURN((struct dt_key *)&it->oiq_id);
+}
+
+/**
+ * Return size of key under iterator (in bytes)
+ *
+ * \param  di   - osd iterator
+ */
+static int osd_it_acct_key_size(const struct lu_env *env,
+				const struct dt_it *di)
+{
+	struct osd_it_quota *it = (struct osd_it_quota *)di;
+
+	ENTRY;
+	RETURN((int)sizeof(it->oiq_id));
+}
+
+/**
+ * Return pointer to the record under iterator.
+ *
+ * \param  di    - osd iterator
+ * \param  attr  - not used
+ */
+static int osd_it_acct_rec(const struct lu_env *env,
+			   const struct dt_it *di,
+			   struct dt_rec *dtrec, __u32 attr)
+{
+	struct osd_it_quota	*it = (struct osd_it_quota *)di;
+	const struct dt_key	*key = osd_it_acct_key(env, di);
+	int			 rc;
+
+	ENTRY;
+
+	rc = osd_acct_index_lookup(env, &it->oiq_obj->oo_dt, dtrec, key,
+				   BYPASS_CAPA);
+	RETURN(rc > 0 ? 0 : rc);
+}
+
+/**
+ * Returns cookie for current Iterator position.
+ *
+ * \param  di    - osd iterator
+ */
+static __u64 osd_it_acct_store(const struct lu_env *env,
+			       const struct dt_it *di)
+{
+	struct osd_it_quota *it = (struct osd_it_quota *)di;
+
+	ENTRY;
+	RETURN(it->oiq_id);
+}
+
+/**
+ * Restore iterator from cookie. if the \a hash isn't found,
+ * restore the first valid record.
+ *
+ * \param  di    - osd iterator
+ * \param  hash  - iterator location cookie
+ *
+ * \retval +ve   - di points to the first valid record
+ * \retval  +1   - di points to exact matched hash
+ * \retval -ve   - failure
+ */
+static int osd_it_acct_load(const struct lu_env *env,
+			    const struct dt_it *di, __u64 hash)
+{
+	ENTRY;
+	RETURN(osd_it_acct_get(env, (struct dt_it *)di,
+			       (const struct dt_key *)&hash));
+}
+
+/**
+ * Index and Iterator operations for accounting objects
+ */
+const struct dt_index_operations osd_acct_index_ops = {
+	.dio_lookup	= osd_acct_index_lookup,
+	.dio_it		= {
+		.init		= osd_it_acct_init,
+		.fini		= osd_it_acct_fini,
+		.get		= osd_it_acct_get,
+		.put		= osd_it_acct_put,
+		.next		= osd_it_acct_next,
+		.key		= osd_it_acct_key,
+		.key_size	= osd_it_acct_key_size,
+		.rec		= osd_it_acct_rec,
+		.store		= osd_it_acct_store,
+		.load		= osd_it_acct_load
+	}
+};
+
diff --git a/lustre/osd-ldiskfs/osd_quota_fmt.c b/lustre/osd-ldiskfs/osd_quota_fmt.c
new file mode 100644
index 0000000..4dbfa07
--- /dev/null
+++ b/lustre/osd-ldiskfs/osd_quota_fmt.c
@@ -0,0 +1,284 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Use is subject to license terms.
+ *
+ * Lustre administrative quota format.
+ * from linux/fs/quota_v2.c
+ */
+
+#include "osd_internal.h"
+#include "osd_quota_fmt.h"
+
+typedef char *dqbuf_t;
+
+static const union
+{
+	struct lustre_disk_dqblk_v2 r1;
+} emptydquot = { .r1 = { 0 } };
+
+static inline dqbuf_t getdqbuf(void)
+{
+	dqbuf_t buf = cfs_alloc(LUSTRE_DQBLKSIZE, CFS_ALLOC_IO);
+	if (!buf)
+		CWARN("Not enough memory for quota buffers.\n");
+	return buf;
+}
+
+static inline void freedqbuf(dqbuf_t buf)
+{
+	cfs_free(buf);
+}
+
+/**
+ * Read the \a blk into \a buf.
+ *
+ * TODO Will support enforcement quota later.
+ */
+static ssize_t quota_read_blk(const struct lu_env *env,
+			      struct osd_object *obj,
+			      int type, uint blk, dqbuf_t buf)
+{
+	ssize_t ret;
+	struct super_block *sb = obj->oo_inode->i_sb;
+
+	ENTRY;
+
+	memset(buf, 0, LUSTRE_DQBLKSIZE);
+	ret = sb->s_op->quota_read(sb, type, buf, LUSTRE_DQBLKSIZE,
+				   blk << LUSTRE_DQBLKSIZE_BITS);
+
+	/* Reading past EOF just returns a block of zeros */
+	if (ret == -EBADR)
+		ret = 0;
+
+	RETURN(ret);
+}
+
+/**
+ * Find entry in block by given \a dqid in the leaf block \a blk
+ *
+ * \retval +ve, the offset of the entry in file
+ * \retval   0, entry not found
+ * \retval -ve, unexpected failure
+ */
+static loff_t find_block_dqentry(const struct lu_env *env,
+				 struct osd_object *obj, int type,
+				 qid_t dqid, uint blk,
+				 struct osd_it_quota *it)
+{
+	dqbuf_t				 buf = getdqbuf();
+	loff_t				 ret;
+	int				 i;
+	struct lustre_disk_dqblk_v2	*ddquot;
+	int				 dqblk_sz;
+
+	ENTRY;
+
+	ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf);
+	dqblk_sz = sizeof(struct lustre_disk_dqblk_v2);
+	if (!buf)
+		RETURN(-ENOMEM);
+	ret = quota_read_blk(env, obj, type, blk, buf);
+	if (ret < 0) {
+		CERROR("Can't read quota tree block %u.\n", blk);
+		GOTO(out_buf, ret);
+	}
+
+	if (dqid) {
+		for (i = 0; i < LUSTRE_DQSTRINBLK &&
+			    le32_to_cpu(ddquot[i].dqb_id) != dqid; i++)
+			continue;
+	} else { /* ID 0 as a bit more complicated searching... */
+		for (i = 0; i < LUSTRE_DQSTRINBLK; i++)
+			if (!le32_to_cpu(ddquot[i].dqb_id) &&
+			    memcmp((char *)&emptydquot, (char *)&ddquot[i],
+				   dqblk_sz))
+				break;
+	}
+	if (i == LUSTRE_DQSTRINBLK) {
+		CDEBUG(D_QUOTA, "Quota for id %u not found.\n", dqid);
+		ret = 0;
+		GOTO(out_buf, ret);
+	} else {
+		ret = (blk << LUSTRE_DQBLKSIZE_BITS) +
+		      sizeof(struct lustre_disk_dqdbheader) + i * dqblk_sz;
+
+		if (it) {
+			it->oiq_blk[LUSTRE_DQTREEDEPTH - 1] = blk;
+			it->oiq_offset = ret;
+			it->oiq_id = dqid;
+		} else {
+			ret = 0;
+		}
+	}
+out_buf:
+	freedqbuf(buf);
+	RETURN(ret);
+}
+
+/**
+ * Find entry for given \a dqid in the tree block \a blk
+ *
+ * \retval +ve, the offset of the entry in file
+ * \retval   0, entry not found
+ * \retval -ve, unexpected failure
+ */
+loff_t find_tree_dqentry(const struct lu_env *env,
+			 struct osd_object *obj, int type,
+			 qid_t dqid, uint blk, int depth,
+			 struct osd_it_quota *it)
+{
+	dqbuf_t	 buf = getdqbuf();
+	loff_t	 ret;
+	u32	*ref = (u32 *) buf;
+
+	ENTRY;
+
+	if (!buf)
+		RETURN(-ENOMEM);
+	ret = quota_read_blk(env, obj, 0, blk, buf);
+	if (ret < 0) {
+		CERROR("Can't read quota tree block %u.\n", blk);
+		GOTO(out_buf, ret);
+	}
+	ret = 0;
+	blk = le32_to_cpu(ref[GETIDINDEX(dqid, depth)]);
+	if (!blk)               /* No reference? */
+		GOTO(out_buf, ret);
+
+	if (depth < LUSTRE_DQTREEDEPTH - 1)
+		ret = find_tree_dqentry(env, obj, type, dqid, blk,
+					depth + 1, it);
+	else
+		ret = find_block_dqentry(env, obj, type, dqid, blk, it);
+
+	if (it && ret > 0) /* Entry found */
+		it->oiq_blk[depth] = blk;
+out_buf:
+	freedqbuf(buf);
+	RETURN(ret);
+}
+
+/**
+ * Search from \a index within the leaf block \a blk, and fill the \a it with
+ * the first valid entry.
+ *
+ * \retval +ve, no valid entry found
+ * \retval   0, entry found
+ * \retval -ve, unexpected failure
+ */
+int walk_block_dqentry(const struct lu_env *env, struct osd_object *obj,
+		       int type, uint blk, uint index,
+		       struct osd_it_quota *it)
+{
+	dqbuf_t				 buf = getdqbuf();
+	loff_t				 ret = 0;
+	struct lustre_disk_dqdbheader	*dqhead;
+	int				 i, dqblk_sz;
+	struct lustre_disk_dqblk_v2	*ddquot;
+
+	ENTRY;
+
+	dqhead = (struct lustre_disk_dqdbheader *)buf;
+	dqblk_sz = sizeof(struct lustre_disk_dqblk_v2);
+	if (!buf)
+		RETURN(-ENOMEM);
+	ret = quota_read_blk(env, obj, type, blk, buf);
+	if (ret < 0) {
+		CERROR("Can't read quota tree block %u.\n", blk);
+		GOTO(out_buf, ret);
+	}
+	ret = 1;
+
+	if (!le32_to_cpu(dqhead->dqdh_entries))
+		GOTO(out_buf, ret);
+
+	ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf);
+	LASSERT(index < LUSTRE_DQSTRINBLK);
+	for (i = index; i < LUSTRE_DQSTRINBLK; i++) {
+		/* skip empty entry */
+		if (!memcmp((char *)&emptydquot,
+			    (char *)&ddquot[i], dqblk_sz))
+			continue;
+
+		it->oiq_blk[LUSTRE_DQTREEDEPTH - 1] = blk;
+		it->oiq_id = le32_to_cpu(ddquot[i].dqb_id);
+		it->oiq_offset = (blk << LUSTRE_DQBLKSIZE_BITS) +
+				  sizeof(struct lustre_disk_dqdbheader) +
+				  i * dqblk_sz;
+		ret = 0;
+		break;
+	}
+
+out_buf:
+	freedqbuf(buf);
+	RETURN(ret);
+}
+
+/**
+ * Search from \a index within the tree block \a blk, and fill the \a it
+ * with the first valid entry.
+ *
+ * \retval +ve, no valid entry found
+ * \retval   0, entry found
+ * \retval -ve, unexpected failure
+ */
+int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj,
+		      int type, uint blk, int depth, uint index,
+		      struct osd_it_quota *it)
+{
+	dqbuf_t	 buf = getdqbuf();
+	loff_t	 ret;
+	u32	*ref = (u32 *) buf;
+
+	ENTRY;
+
+	if (!buf)
+		RETURN(-ENOMEM);
+	ret = quota_read_blk(env, obj, type, blk, buf);
+	if (ret < 0) {
+		CERROR("Can't read quota tree block %u.\n", blk);
+		goto out_buf;
+	}
+	ret = 1;
+
+	for (; index <= 0xff && ret > 0; index++) {
+		blk = le32_to_cpu(ref[index]);
+		if (!blk)       /* No reference */
+			continue;
+
+		if (depth < LUSTRE_DQTREEDEPTH - 1)
+			ret = walk_tree_dqentry(env, obj, type, blk,
+						depth + 1, 0, it);
+		else
+			ret = walk_block_dqentry(env, obj, type, blk, 0, it);
+	}
+
+	if (ret == 0) /* Entry found */
+		it->oiq_blk[depth] = blk;
+out_buf:
+	freedqbuf(buf);
+	RETURN(ret);
+}
diff --git a/lustre/osd-ldiskfs/osd_quota_fmt.h b/lustre/osd-ldiskfs/osd_quota_fmt.h
new file mode 100644
index 0000000..2857a7d
--- /dev/null
+++ b/lustre/osd-ldiskfs/osd_quota_fmt.h
@@ -0,0 +1,112 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Use is subject to license terms.
+ *
+ * Lustre ldiskfs quota format
+ * from include/linux/quotaio_v2.h
+ */
+#ifndef _OSD_QUOTA_FMT_H
+#define _OSD_QUOTA_FMT_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is a radix tree whose leaves point
+ * to blocks of these structures. for the version 2.
+ */
+struct lustre_disk_dqblk_v2 {
+	__u32 dqb_id;         /**< id this quota applies to */
+	__u32 padding;
+	__u64 dqb_ihardlimit; /**< absolute limit on allocated inodes */
+	__u64 dqb_isoftlimit; /**< preferred inode limit */
+	__u64 dqb_curinodes;  /**< current # allocated inodes */
+	/**< absolute limit on disk space (in QUOTABLOCK_SIZE) */
+	__u64 dqb_bhardlimit;
+	/**< preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__u64 dqb_bsoftlimit;
+	__u64 dqb_curspace;   /**< current space occupied (in bytes) */
+	obd_time dqb_btime;   /**< time limit for excessive disk use */
+	obd_time dqb_itime;   /**< time limit for excessive inode use */
+};
+
+/* Number of entries in one blocks(14 entries) */
+#define LUSTRE_DQSTRINBLK \
+		((LUSTRE_DQBLKSIZE - sizeof(struct lustre_disk_dqdbheader)) \
+		 / sizeof(struct lustre_disk_dqblk_v2))
+#define GETENTRIES(buf) (((char *)buf)+sizeof(struct lustre_disk_dqdbheader))
+
+/*
+ * Here are header structures as written on disk and their in-memory copies
+ */
+/* First generic header */
+struct lustre_disk_dqheader {
+	__u32 dqh_magic; /* Magic number identifying file */
+	__u32 dqh_version; /* File version */
+};
+
+/* Header with type and version specific information */
+struct lustre_disk_dqinfo {
+	/* Time before block soft limit becomes hard limit */
+	__u32 dqi_bgrace;
+	/* Time before inode soft limit becomes hard limit */
+	__u32 dqi_igrace;
+	/* Flags for quotafile (DQF_*) */
+	__u32 dqi_flags;
+	/* Number of blocks in file */
+	__u32 dqi_blocks;
+	/* Number of first free block in the list */
+	__u32 dqi_free_blk;
+	/* Number of block with at least one free entry */
+	__u32 dqi_free_entry;
+};
+
+/*
+ *  Structure of header of block with quota structures. It is padded to
+ *  16 bytes so there will be space for exactly 21 quota-entries in a block
+ */
+struct lustre_disk_dqdbheader {
+	__u32 dqdh_next_free; /* Number of next block with free entry */
+	__u32 dqdh_prev_free; /* Number of previous block with free entry */
+	__u16 dqdh_entries;   /* Number of valid entries in block */
+	__u16 dqdh_pad1;
+	__u32 dqdh_pad2;
+};
+
+/* Offset of info header in file */
+#define LUSTRE_DQINFOOFF	sizeof(struct lustre_disk_dqheader)
+#define LUSTRE_DQBLKSIZE_BITS	10
+/* Size of block with quota structures */
+#define LUSTRE_DQBLKSIZE	(1 << LUSTRE_DQBLKSIZE_BITS)
+/* Offset of tree in file in blocks */
+#define LUSTRE_DQTREEOFF	1
+/* Depth of quota tree */
+#define LUSTRE_DQTREEDEPTH	4
+
+#define GETIDINDEX(id, depth)	(((id) >> \
+				((LUSTRE_DQTREEDEPTH - (depth) - 1) * 8)) & \
+				0xff)
+#endif /* osd_quota_fmt.h */
diff --git a/lustre/quota/lquota_lib.c b/lustre/quota/lquota_lib.c
index dfffb86..0f5d984 100644
--- a/lustre/quota/lquota_lib.c
+++ b/lustre/quota/lquota_lib.c
@@ -1,5 +1,4 @@
 /*
- *
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- 
1.8.3.1