diff -urp linux-3.0.61-0.orig/fs/quota/dquot.c linux-3.0.61-0/fs/quota/dquot.c --- linux-3.0.61-0.orig/fs/quota/dquot.c 2013-04-10 15:15:11.000000000 -0400 +++ linux-3.0.61-0/fs/quota/dquot.c 2013-04-24 10:27:22.000000000 -0400 @@ -83,26 +83,21 @@ /* * There are three quota SMP locks. dq_list_lock protects all lists with quotas * and quota formats. - * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and - * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. - * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly - * in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects - * modifications of quota state (on quotaon and quotaoff) and readers who care - * about latest values take it as well. + * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures. + * dq_state_lock protects modifications of quota state (on quotaon and quotaoff) + * and readers who care about latest values take it as well. * - * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock, + * The spinlock ordering is hence: i_lock > dq_data_lock > dq_list_lock, * dq_list_lock > dq_state_lock * * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock * - * Any operation working on dquots via inode pointers must hold dqptr_sem. If - * operation is just reading pointers from inode (or not using them at all) the - * read lock is enough. If pointers are altered function must hold write lock. + * Any operation working on dquots via inode pointers must hold i_lock. * Special care needs to be taken about S_NOQUOTA inode flag (marking that * inode is a quota file). Functions adding pointers from inode to dquots have - * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they - * have to do all pointer modifications before dropping dqptr_sem. This makes + * to check this flag under i_lock and then (if S_NOQUOTA is not set) they + * have to do all pointer modifications before dropping i_lock. This makes * sure they cannot race with quotaon which first sets S_NOQUOTA flag and * then drops all pointers to dquots from an inode. * @@ -116,15 +111,8 @@ * spinlock to internal buffers before writing. * * Lock ordering (including related VFS locks) is the following: - * i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > - * dqio_mutex - * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem > - * dqptr_sem. But filesystem has to count with the fact that functions such as - * dquot_alloc_space() acquire dqptr_sem and they usually have to be called - * from inside a transaction to keep filesystem consistency after a crash. Also - * filesystems usually want to do some IO on dquot from ->mark_dirty which is - * called with dqptr_sem held. - * i_mutex on quota files is special (it's below dqio_mutex) + * i_mutex > dqonoff_sem > journal_lock > dquot->dq_lock > dqio_mutex + * i_mutex on quota files is special (it's below dqio_mutex) */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); @@ -955,7 +943,6 @@ static inline int dqput_blocks(struct dq /* * Remove references to dquots from inode and add dquot to list for freeing * if we have the last reference to dquot - * We can't race with anybody because we hold dqptr_sem for writing... */ static int remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head) @@ -1016,13 +1003,15 @@ static void remove_dquot_ref(struct supe * We have to scan also I_NEW inodes because they can already * have quota pointer initialized. Luckily, we need to touch * only quota pointers and these have separate locking - * (dqptr_sem). + * (i_lock). */ + spin_lock(&inode->i_lock); if (!IS_NOQUOTA(inode)) { if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; remove_inode_dquot_ref(inode, type, tofree_head); } + spin_unlock(&inode->i_lock); } spin_unlock(&inode_sb_list_lock); #ifdef CONFIG_QUOTA_DEBUG @@ -1040,9 +1029,7 @@ static void drop_dquot_ref(struct super_ LIST_HEAD(tofree_head); if (sb->dq_op) { - down_write(&sb_dqopt(sb)->dqptr_sem); remove_dquot_ref(sb, type, &tofree_head); - up_write(&sb_dqopt(sb)->dqptr_sem); put_dquot_list(&tofree_head); } } @@ -1349,9 +1336,6 @@ static int dquot_active(const struct ino /* * Initialize quota pointers in inode * - * We do things in a bit complicated way but by that we avoid calling - * dqget() and thus filesystem callbacks under dqptr_sem. - * * It is better to call this function outside of any transaction as it * might need a lot of space in journal for dquot structure allocation. */ @@ -1384,7 +1368,7 @@ static void __dquot_initialize(struct in got[cnt] = dqget(sb, id, cnt); } - down_write(&sb_dqopt(sb)->dqptr_sem); + spin_lock(&inode->i_lock); if (IS_NOQUOTA(inode)) goto out_err; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1404,12 +1388,16 @@ static void __dquot_initialize(struct in * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); - if (unlikely(rsv)) + if (unlikely(rsv)) { + spin_lock(&dq_data_lock); dquot_resv_space(inode->i_dquot[cnt], rsv); + spin_unlock(&dq_data_lock); + } } } out_err: - up_write(&sb_dqopt(sb)->dqptr_sem); + spin_unlock(&inode->i_lock); + /* Drop unused references */ dqput_all(got); } @@ -1428,12 +1416,12 @@ static void __dquot_drop(struct inode *i int cnt; struct dquot *put[MAXQUOTAS]; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { put[cnt] = inode->i_dquot[cnt]; inode->i_dquot[cnt] = NULL; } - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&inode->i_lock); dqput_all(put); } @@ -1473,27 +1461,42 @@ static qsize_t *inode_reserved_space(str return inode->i_sb->dq_op->get_reserved_space(inode); } +static inline void __inode_add_rsv_space(struct inode *inode, qsize_t number) +{ + *inode_reserved_space(inode) += number; +} + void inode_add_rsv_space(struct inode *inode, qsize_t number) { spin_lock(&inode->i_lock); - *inode_reserved_space(inode) += number; + __inode_add_rsv_space(inode, number); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_add_rsv_space); -void inode_claim_rsv_space(struct inode *inode, qsize_t number) +static inline void __inode_claim_rsv_space(struct inode *inode, qsize_t number) { - spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; __inode_add_bytes(inode, number); +} + +void inode_claim_rsv_space(struct inode *inode, qsize_t number) +{ + spin_lock(&inode->i_lock); + __inode_claim_rsv_space(inode, number); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_claim_rsv_space); +static inline void __inode_sub_rsv_space(struct inode *inode, qsize_t number) +{ + *inode_reserved_space(inode) -= number; +} + void inode_sub_rsv_space(struct inode *inode, qsize_t number) { spin_lock(&inode->i_lock); - *inode_reserved_space(inode) -= number; + __inode_sub_rsv_space(inode, number); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_sub_rsv_space); @@ -1504,9 +1507,8 @@ static qsize_t inode_get_rsv_space(struc if (!inode->i_sb->dq_op->get_reserved_space) return 0; - spin_lock(&inode->i_lock); + ret = *inode_reserved_space(inode); - spin_unlock(&inode->i_lock); return ret; } @@ -1514,17 +1516,17 @@ static void inode_incr_space(struct inod int reserve) { if (reserve) - inode_add_rsv_space(inode, number); + __inode_add_rsv_space(inode, number); else - inode_add_bytes(inode, number); + __inode_add_bytes(inode, number); } static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) { if (reserve) - inode_sub_rsv_space(inode, number); + __inode_sub_rsv_space(inode, number); else - inode_sub_bytes(inode, number); + __inode_sub_bytes(inode, number); } /* @@ -1547,6 +1549,7 @@ int __dquot_alloc_space(struct inode *in int warn = flags & DQUOT_SPACE_WARN; int reserve = flags & DQUOT_SPACE_RESERVE; int nofail = flags & DQUOT_SPACE_NOFAIL; + struct dquot *dquot[MAXQUOTAS] = { NULL }; /* * First test before acquiring mutex - solves deadlocks when we @@ -1557,15 +1560,17 @@ int __dquot_alloc_space(struct inode *in goto out; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + dquot[cnt] = inode->i_dquot[cnt]; + if (!dquot[cnt]) continue; - ret = check_bdq(inode->i_dquot[cnt], number, !warn, + atomic_inc(&dquot[cnt]->dq_count); + ret = check_bdq(dquot[cnt], number, !warn, warntype+cnt); if (ret && !nofail) { spin_unlock(&dq_data_lock); @@ -1573,22 +1578,23 @@ int __dquot_alloc_space(struct inode *in } } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; if (reserve) - dquot_resv_space(inode->i_dquot[cnt], number); + dquot_resv_space(dquot[cnt], number); else - dquot_incr_space(inode->i_dquot[cnt], number); + dquot_incr_space(dquot[cnt], number); } inode_incr_space(inode, number, reserve); spin_unlock(&dq_data_lock); + spin_unlock(&inode->i_lock); if (reserve) goto out_flush_warn; - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(dquot); out_flush_warn: - flush_warnings(inode->i_dquot, warntype); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + flush_warnings(dquot, warntype); + dqput_all(dquot); out: return ret; } @@ -1601,6 +1607,7 @@ int dquot_alloc_inode(const struct inode { int cnt, ret = 0; char warntype[MAXQUOTAS]; + struct dquot *dquot[MAXQUOTAS] = { NULL }; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1608,28 +1615,33 @@ int dquot_alloc_inode(const struct inode return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + + spin_lock(&((struct inode *)inode)->i_lock); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + dquot[cnt] = inode->i_dquot[cnt]; + if (!dquot[cnt]) continue; - ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt); + atomic_inc(&dquot[cnt]->dq_count); + ret = check_idq(dquot[cnt], 1, warntype + cnt); if (ret) goto warn_put_all; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - dquot_incr_inodes(inode->i_dquot[cnt], 1); + dquot_incr_inodes(dquot[cnt], 1); } warn_put_all: spin_unlock(&dq_data_lock); + spin_unlock(&((struct inode *)inode)->i_lock); + if (ret == 0) - mark_all_dquot_dirty(inode->i_dquot); - flush_warnings(inode->i_dquot, warntype); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + mark_all_dquot_dirty(dquot); + flush_warnings(dquot, warntype); + dqput_all(dquot); return ret; } EXPORT_SYMBOL(dquot_alloc_inode); @@ -1639,6 +1651,7 @@ EXPORT_SYMBOL(dquot_alloc_inode); */ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { + struct dquot *dquot[MAXQUOTAS] = { NULL }; int cnt; if (!dquot_active(inode)) { @@ -1646,19 +1659,23 @@ int dquot_claim_space_nodirty(struct ino return 0; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_lock(&inode->i_lock); spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_claim_reserved_space(inode->i_dquot[cnt], - number); + dquot[cnt] = inode->i_dquot[cnt]; + if (dquot[cnt]) { + atomic_inc(&dquot[cnt]->dq_count); + dquot_claim_reserved_space(dquot[cnt], number); + } } /* Update inode bytes */ - inode_claim_rsv_space(inode, number); + __inode_claim_rsv_space(inode, number); spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&inode->i_lock); + + mark_all_dquot_dirty(dquot); + dqput_all(dquot); return 0; } EXPORT_SYMBOL(dquot_claim_space_nodirty); @@ -1671,6 +1688,7 @@ void __dquot_free_space(struct inode *in unsigned int cnt; char warntype[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; + struct dquot *dquot[MAXQUOTAS] = { NULL }; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1679,26 +1697,29 @@ void __dquot_free_space(struct inode *in return; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_lock(&inode->i_lock); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + dquot[cnt] = inode->i_dquot[cnt]; + if (!dquot[cnt]) continue; - warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); + atomic_inc(&dquot[cnt]->dq_count); + warntype[cnt] = info_bdq_free(dquot[cnt], number); if (reserve) - dquot_free_reserved_space(inode->i_dquot[cnt], number); + dquot_free_reserved_space(dquot[cnt], number); else - dquot_decr_space(inode->i_dquot[cnt], number); + dquot_decr_space(dquot[cnt], number); } inode_decr_space(inode, number, reserve); spin_unlock(&dq_data_lock); + spin_unlock(&inode->i_lock); if (reserve) goto out_unlock; - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(dquot); out_unlock: - flush_warnings(inode->i_dquot, warntype); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + flush_warnings(dquot, warntype); + dqput_all(dquot); } EXPORT_SYMBOL(__dquot_free_space); @@ -1707,26 +1728,31 @@ EXPORT_SYMBOL(__dquot_free_space); */ void dquot_free_inode(const struct inode *inode) { - unsigned int cnt; + struct dquot *dquot[MAXQUOTAS] = { NULL }; char warntype[MAXQUOTAS]; + unsigned int cnt; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) return; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_lock(&((struct inode *)inode)->i_lock); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + dquot[cnt] = inode->i_dquot[cnt]; + if (!dquot[cnt]) continue; - warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1); - dquot_decr_inodes(inode->i_dquot[cnt], 1); + atomic_inc(&dquot[cnt]->dq_count); + warntype[cnt] = info_idq_free(dquot[cnt], 1); + dquot_decr_inodes(dquot[cnt], 1); } spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); - flush_warnings(inode->i_dquot, warntype); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&((struct inode *)inode)->i_lock); + + mark_all_dquot_dirty(dquot); + flush_warnings(dquot, warntype); + dqput_all(dquot); } EXPORT_SYMBOL(dquot_free_inode); @@ -1757,13 +1783,13 @@ int __dquot_transfer(struct inode *inode /* Initialize the arrays */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype_to[cnt] = QUOTA_NL_NOWARN; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_lock(&inode->i_lock); if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&inode->i_lock); return 0; } spin_lock(&dq_data_lock); - cur_space = inode_get_bytes(inode); + cur_space = __inode_get_bytes(inode); rsv_space = inode_get_rsv_space(inode); space = cur_space + rsv_space; /* Build the transfer_from list and check the limits */ @@ -1811,7 +1837,7 @@ int __dquot_transfer(struct inode *inode inode->i_dquot[cnt] = transfer_to[cnt]; } spin_unlock(&dq_data_lock); - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&inode->i_lock); mark_all_dquot_dirty(transfer_from); mark_all_dquot_dirty(transfer_to); @@ -1825,7 +1851,7 @@ int __dquot_transfer(struct inode *inode return 0; over_quota: spin_unlock(&dq_data_lock); - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&inode->i_lock); flush_warnings(transfer_to, warntype_to); return ret; } diff -urp linux-3.0.61-0.orig/fs/quota/quota.c linux-3.0.61-0/fs/quota/quota.c --- linux-3.0.61-0.orig/fs/quota/quota.c 2013-04-10 15:15:08.000000000 -0400 +++ linux-3.0.61-0/fs/quota/quota.c 2013-04-24 10:27:22.000000000 -0400 @@ -79,13 +79,13 @@ static int quota_getfmt(struct super_blo { __u32 fmt; - down_read(&sb_dqopt(sb)->dqptr_sem); + mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); if (!sb_has_quota_active(sb, type)) { - up_read(&sb_dqopt(sb)->dqptr_sem); + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return -ESRCH; } fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; - up_read(&sb_dqopt(sb)->dqptr_sem); + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; diff -urp linux-3.0.61-0.orig/fs/stat.c linux-3.0.61-0/fs/stat.c --- linux-3.0.61-0.orig/fs/stat.c 2013-04-10 15:15:08.000000000 -0400 +++ linux-3.0.61-0/fs/stat.c 2013-04-24 10:27:22.000000000 -0400 @@ -435,9 +435,8 @@ void inode_add_bytes(struct inode *inode EXPORT_SYMBOL(inode_add_bytes); -void inode_sub_bytes(struct inode *inode, loff_t bytes) +void __inode_sub_bytes(struct inode *inode, loff_t bytes) { - spin_lock(&inode->i_lock); inode->i_blocks -= bytes >> 9; bytes &= 511; if (inode->i_bytes < bytes) { @@ -445,17 +444,28 @@ void inode_sub_bytes(struct inode *inode inode->i_bytes += 512; } inode->i_bytes -= bytes; +} + +void inode_sub_bytes(struct inode *inode, loff_t bytes) +{ + spin_lock(&inode->i_lock); + __inode_sub_bytes(inode, bytes); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_sub_bytes); +loff_t __inode_get_bytes(struct inode *inode) +{ + return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes; +} + loff_t inode_get_bytes(struct inode *inode) { loff_t ret; spin_lock(&inode->i_lock); - ret = (((loff_t)inode->i_blocks) << 9) + inode->i_bytes; + ret = __inode_get_bytes(inode); spin_unlock(&inode->i_lock); return ret; } diff -urp linux-3.0.61-0.orig/fs/super.c linux-3.0.61-0/fs/super.c --- linux-3.0.61-0.orig/fs/super.c 2013-04-10 15:15:08.000000000 -0400 +++ linux-3.0.61-0/fs/super.c 2013-04-24 10:27:22.000000000 -0400 @@ -108,7 +108,6 @@ static struct super_block *alloc_super(s lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); mutex_init(&s->s_dquot.dqio_mutex); mutex_init(&s->s_dquot.dqonoff_mutex); - init_rwsem(&s->s_dquot.dqptr_sem); init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; diff -urp linux-3.0.61-0.orig/include/linux/fs.h linux-3.0.61-0/include/linux/fs.h --- linux-3.0.61-0.orig/include/linux/fs.h 2013-04-24 10:27:55.000000000 -0400 +++ linux-3.0.61-0/include/linux/fs.h 2013-04-22 17:42:39.000000000 -0400 @@ -2450,7 +2450,9 @@ extern void generic_fillattr(struct inod extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); +void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); +loff_t __inode_get_bytes(struct inode *inode); loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes);