Whamcloud - gitweb
Branch b1_8
[fs/lustre-release.git] / lustre / quota / quota_interface.c
index 35a7f4a..c941cee 100644 (file)
@@ -1,36 +1,53 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  lustre/quota/quota_interface.c
+ * GPL HEADER START
  *
- *  Copyright (c) 2001-2005 Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   No redistribution or use is permitted outside of Cluster File Systems, Inc.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  */
+
 #ifndef EXPORT_SYMTAB
 # define EXPORT_SYMTAB
 #endif
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_LQUOTA
 
 #ifdef __KERNEL__
 # include <linux/version.h>
 # include <linux/module.h>
 # include <linux/init.h>
 # include <linux/fs.h>
-# include <linux/jbd.h>
-# include <linux/ext3_fs.h>
-# include <linux/parser.h>
-# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #  include <linux/smp_lock.h>
 #  include <linux/buffer_head.h>
 #  include <linux/workqueue.h>
 #  include <linux/mount.h>
-# else
-#  include <linux/locks.h>
-# endif
 #else /* __KERNEL__ */
 # include <liblustre.h>
 #endif
 #include <obd_ost.h>
 #include <lustre_fsfilt.h>
 #include <lustre_quota.h>
+#include <lprocfs_status.h>
 #include "quota_internal.h"
 
-
 #ifdef __KERNEL__
-extern unsigned long default_bunit_sz;
-extern unsigned long default_btune_ratio;
-extern unsigned long default_iunit_sz;
-extern unsigned long default_itune_ratio;
-
-enum {
-        Opt_quotaon, Opt_iunit_sz, Opt_bunit_sz,
-        Opt_itune_ratio, Opt_btune_ratio, Opt_err,
-};
-
-static match_table_t tokens = {
-        {Opt_quotaon, "quotaon=%10s"},
-        {Opt_iunit_sz, "iunit=%u"},
-        {Opt_bunit_sz, "bunit=%u"},
-        {Opt_itune_ratio, "itune=%u"},
-        {Opt_btune_ratio, "btune=%u"},
-        {Opt_err, NULL}
-};
-
-static int
-quota_parse_config_args(char *options, int *quotaon, int *type,
-                        struct lustre_quota_ctxt *qctxt)
-{
-        char *opt;
-        substring_t args[MAX_OPT_ARGS];
-        int option;
-        int rc = 0;
-        unsigned long iunit = 0, bunit = 0, itune = 0, btune = 0;
-        ENTRY;
-
-        while ((opt = strsep (&options, ",")) != NULL) {
-                int token;
-                if (!*opt)
-                        continue;
 
-                token = match_token(opt, tokens, args);
-                switch(token) {
-                case Opt_quotaon: {
-                        char *quota_type = match_strdup(&args[0]);
-                        if (!quota_type)
-                                GOTO(out, rc = -EINVAL);
-
-                        *quotaon = 1;
-                        if (strchr(quota_type, 'u') && strchr(quota_type, 'g'))
-                                *type = UGQUOTA;
-                        else if (strchr(quota_type, 'u'))
-                                *type = USRQUOTA;
-                        else if (strchr(quota_type, 'g'))
-                                *type = GRPQUOTA;
-                        else {
-                                *quotaon = 0;
-                                rc = -EINVAL;
-                        }
-                        break;
-                }
-                case Opt_iunit_sz:
-                        if (match_int(&args[0], &option))
-                                rc = -EINVAL;
-                        iunit = option;
-                        break;
-                case Opt_bunit_sz:
-                        if (match_int(&args[0], &option))
-                                rc = -EINVAL;
-                        bunit = option;
-                        break;
-                case Opt_itune_ratio:
-                        if (match_int(&args[0], &option) ||
-                            option <= 0 || option >= 100)
-                                rc = -EINVAL;
-                        itune = option;
-                        break;
-                case Opt_btune_ratio:
-                        if (match_int(&args[0], &option) ||
-                            option <= 0 || option >= 100)
-                                rc = -EINVAL;
-                        btune = option;
-                        break;
-                default:
-                        rc = -EINVAL;
-                }
+#ifdef HAVE_QUOTA_SUPPORT
 
-                if (rc)
-                        GOTO(out, rc);
-        }
-
-        /* adjust the tunables of qunits based on quota config args */
-        if (iunit)
-                qctxt->lqc_iunit_sz = iunit;
-        if (itune)
-                qctxt->lqc_itune_sz = qctxt->lqc_iunit_sz *
-                                      itune / 100;
-        else
-                qctxt->lqc_itune_sz = qctxt->lqc_iunit_sz *
-                                      default_itune_ratio / 100;
-        if (bunit)
-                qctxt->lqc_bunit_sz = bunit << 20;
-        if (btune)
-                qctxt->lqc_btune_sz = ((qctxt->lqc_bunit_sz >> 20) *
-                                        btune / 100) << 20;
-        else
-                qctxt->lqc_btune_sz = ((qctxt->lqc_bunit_sz >> 20) *
-                                        default_btune_ratio / 100) << 20;
-
-        CDEBUG(D_INFO, "iunit=%lu bunit=%lu itune=%lu btune=%lu\n",
-               qctxt->lqc_iunit_sz, qctxt->lqc_bunit_sz,
-               qctxt->lqc_itune_sz, qctxt->lqc_btune_sz);
-        EXIT;
-
- out:
-        if (rc)
-                CERROR("quota config args parse error!(rc = %d) usage: "
-                "--quota quotaon=u|g|ug,iunit=100,bunit=100,itune=50,btune=50\n",
-                 rc);
-
-        return rc;
-}
-
-static int auto_quota_on(struct obd_device *obd, int type,
-                         struct super_block *sb, int is_master)
-{
-        struct obd_quotactl *oqctl;
-        struct lvfs_run_ctxt saved;
-        int rc;
-        ENTRY;
-
-        LASSERT(type == USRQUOTA || type == GRPQUOTA || type == UGQUOTA);
-
-        OBD_ALLOC_PTR(oqctl);
-        if (!oqctl)
-                RETURN(-ENOMEM);
-
-        oqctl->qc_type = type;
-        oqctl->qc_cmd = Q_QUOTAON;
-        oqctl->qc_id = QFMT_LDISKFS;
-
-        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-
-        if (!is_master)
-                goto local_quota;
-
-        /* turn on cluster wide quota */
-        rc = mds_admin_quota_on(obd, oqctl);
-        if (rc) {
-                CERROR("auto enable admin quota error! err = %d\n", rc);
-                GOTO(out_pop, rc);
-        }
-local_quota:
-        /* turn on local quota */
-        rc = fsfilt_quotactl(obd, sb, oqctl);
-        CDEBUG_EX(rc ? D_ERROR : D_INFO, "auto-enable quota. rc=%d\n", rc);
-        if (rc && is_master)
-                mds_quota_off(obd, oqctl);
-out_pop:
-        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-
-        OBD_FREE_PTR(oqctl);
-        RETURN(rc);
-}
-
-static int mds_auto_quota_on(struct obd_device *obd, int type)
-{
-        int rc;
-        ENTRY;
-        rc = auto_quota_on(obd, type, obd->u.obt.obt_sb, 1);
-        RETURN(rc);
-}
-
-static int filter_auto_quota_on(struct obd_device *obd, int type)
-{
-        int rc = 0;
-        ENTRY;
-        rc = auto_quota_on(obd, type, obd->u.obt.obt_sb, 0);
-        RETURN(rc);
-}
+static cfs_time_t last_print = 0;
+static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED;
 
-static int filter_quota_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+static int filter_quota_setup(struct obd_device *obd)
 {
         int rc = 0;
         struct obd_device_target *obt = &obd->u.obt;
         ENTRY;
 
+#ifdef HAVE_QUOTA64
+        obt->obt_qfmt = LUSTRE_QUOTA_V2;
+#else
+        obt->obt_qfmt = LUSTRE_QUOTA_V1;
+#endif
         atomic_set(&obt->obt_quotachecking, 1);
-        rc = qctxt_init(&obt->obt_qctxt, obt->obt_sb, NULL);
-        if (rc) {
+        rc = qctxt_init(obd, NULL);
+        if (rc)
                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
-                RETURN(rc);
-        }
-
-        /* Based on quota config args, set qunit sizes and enable quota */
-        if (LUSTRE_CFG_BUFLEN(lcfg, 5) > 0 && lustre_cfg_buf(lcfg, 5)) {
-                char *args = lustre_cfg_string(lcfg, 5);
-                int quotaon = 0, type;
-                int err = 0;
-
-                err = quota_parse_config_args(args, &quotaon, &type,
-                                              &obd->u.obt.obt_qctxt);
-                if (!err && quotaon)
-                        filter_auto_quota_on(obd, type);
-        }
 
         RETURN(rc);
 }
@@ -257,46 +96,120 @@ static int filter_quota_cleanup(struct obd_device *obd)
 
 static int filter_quota_setinfo(struct obd_export *exp, struct obd_device *obd)
 {
+        struct obd_import *imp;
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        ENTRY;
+
         /* setup the quota context import */
+        spin_lock(&obd->u.obt.obt_qctxt.lqc_lock);
         obd->u.obt.obt_qctxt.lqc_import = exp->exp_imp_reverse;
+        spin_unlock(&obd->u.obt.obt_qctxt.lqc_lock);
+        CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated now, \n",
+               obd->obd_name,exp->exp_imp_reverse, obd);
+
+        /* make imp's connect flags equal relative exp's connect flags
+         * adding it to avoid the scan export list
+         */
+        imp = exp->exp_imp_reverse;
+        if (imp)
+                imp->imp_connect_data.ocd_connect_flags |=
+                        (exp->exp_connect_flags &
+                         (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
+
+        cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
         /* start quota slave recovery thread. (release high limits) */
         qslave_start_recovery(obd, &obd->u.obt.obt_qctxt);
-        return 0;
+        RETURN(0);
 }
+
+static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        ENTRY;
+
+        /* lquota may be not set up before destroying export, b=14896 */
+        if (!obd->obd_set_up)
+                RETURN(0);
+
+        /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
+         * should be invalid b=12374 */
+        if (qctxt->lqc_import && qctxt->lqc_import == exp->exp_imp_reverse) {
+                spin_lock(&qctxt->lqc_lock);
+                qctxt->lqc_import = NULL;
+                spin_unlock(&qctxt->lqc_lock);
+                ptlrpc_cleanup_imp(exp->exp_imp_reverse);
+                dqacq_interrupt(qctxt);
+                CDEBUG(D_QUOTA, "%s: lqc_import of obd(%p) is invalid now.\n",
+                       obd->obd_name, obd);
+        }
+        RETURN(0);
+}
+
 static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
 {
         ENTRY;
 
-        if (!sb_any_quota_enabled(obd->u.obt.obt_sb))
+        if (!ll_sb_any_quota_active(obd->u.obt.obt_sb))
                 RETURN(0);
 
-        if (ignore)
-                cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
-        else
-                cap_lower(current->cap_effective, CAP_SYS_RESOURCE);
+        if (ignore) {
+                CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
+                cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+        } else {
+                cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+        }
 
         RETURN(0);
 }
 
+#define GET_OA_ID(flag, oa) (flag == USRQUOTA ? oa->o_uid : oa->o_gid)
 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
 {
         struct obd_device_target *obt = &obd->u.obt;
+        struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
         int err, cnt, rc = 0;
         struct obd_quotactl *oqctl;
         ENTRY;
 
-        if (!sb_any_quota_enabled(obt->obt_sb))
+        if (!ll_sb_any_quota_active(obt->obt_sb))
                 RETURN(0);
 
-        oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
-
         OBD_ALLOC_PTR(oqctl);
         if (!oqctl) {
                 CERROR("Not enough memory!");
                 RETURN(-ENOMEM);
         }
 
+        /* set over quota flags for a uid/gid */
+        oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
+        oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
+
         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+                struct lustre_qunit_size *lqs = NULL;
+
+                lqs = quota_search_lqs(LQS_KEY(cnt, GET_OA_ID(cnt, oa)),
+                                       qctxt, 0);
+                if (lqs == NULL || IS_ERR(lqs)) {
+                        rc = PTR_ERR(lqs);
+                        break;
+                } else {
+                        spin_lock(&lqs->lqs_lock);
+                        if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
+                                oa->o_flags |= (cnt == USRQUOTA) ?
+                                        OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
+                                CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
+                                       "sync_blk(%d)\n", lqs->lqs_bunit_sz,
+                                       qctxt->lqc_sync_blk);
+                                spin_unlock(&lqs->lqs_lock);
+                                /* this is for quota_search_lqs */
+                                lqs_putref(lqs);
+                                continue;
+                        }
+                        spin_unlock(&lqs->lqs_lock);
+                        /* this is for quota_search_lqs */
+                        lqs_putref(lqs);
+                }
+
                 memset(oqctl, 0, sizeof(*oqctl));
 
                 oqctl->qc_cmd = Q_GETQUOTA;
@@ -306,14 +219,13 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
                 if (err) {
                         if (!rc)
                                 rc = err;
+                        oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
+                                                             OBD_MD_FLGRPQUOTA);
                         continue;
                 }
 
-                /* set over quota flags for a uid/gid */
-                oa->o_valid |= (cnt == USRQUOTA) ?
-                               OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA;
                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
-                   (toqb(oqctl->qc_dqblk.dqb_curspace) >
+                   (toqb(oqctl->qc_dqblk.dqb_curspace) >=
                     oqctl->qc_dqblk.dqb_bhardlimit))
                         oa->o_flags |= (cnt == USRQUOTA) ?
                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
@@ -323,14 +235,348 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
 }
 
 static int filter_quota_acquire(struct obd_device *obd, unsigned int uid,
-                                unsigned int gid)
+                                unsigned int gid, struct obd_trans_info *oti)
 {
         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
         int rc;
         ENTRY;
 
-        rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 1, 1);
-        RETURN(rc == -EAGAIN);
+        rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, LQUOTA_FLAGS_BLK, 1, oti);
+        RETURN(rc);
+}
+
+/* check whether the left quota of certain uid and gid can satisfy a block_write
+ * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA */
+static int quota_check_common(struct obd_device *obd, unsigned int uid,
+                              unsigned int gid, int count, int cycle, int isblk,
+                              struct inode *inode, int frags, int pending[2])
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        int i;
+        __u32 id[MAXQUOTAS] = { uid, gid };
+        struct qunit_data qdata[MAXQUOTAS];
+        int mb = 0;
+        int rc = 0, rc2[2] = { 0, 0 };
+        ENTRY;
+
+        spin_lock(&qctxt->lqc_lock);
+        if (!qctxt->lqc_valid){
+                spin_unlock(&qctxt->lqc_lock);
+                RETURN(rc);
+        }
+        spin_unlock(&qctxt->lqc_lock);
+
+        for (i = 0; i < MAXQUOTAS; i++) {
+                struct lustre_qunit_size *lqs = NULL;
+
+                qdata[i].qd_id = id[i];
+                qdata[i].qd_flags = i;
+                if (isblk)
+                        QDATA_SET_BLK(&qdata[i]);
+                qdata[i].qd_count = 0;
+
+                /* ignore root user */
+                if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
+                        continue;
+
+                lqs = quota_search_lqs(LQS_KEY(i, id[i]), qctxt, 0);
+                if (lqs == NULL || IS_ERR(lqs))
+                        continue;
+
+                rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
+                spin_lock(&lqs->lqs_lock);
+                if (!cycle) {
+                        if (isblk) {
+                                pending[i] = count * CFS_PAGE_SIZE;
+                                /* in order to complete this write, we need extra
+                                 * meta blocks. This function can get it through
+                                 * data needed to be written b=16542 */
+                                mb = pending[i];
+                                LASSERT(inode && frags > 0);
+                                if (fsfilt_get_mblk(obd, qctxt->lqc_sb, &mb,
+                                                    inode, frags) < 0)
+                                        CDEBUG(D_ERROR,
+                                               "can't get extra meta blocks.\n");
+                                else
+                                        pending[i] += mb;
+                                lqs->lqs_bwrite_pending += pending[i];
+                        } else {
+                                pending[i] = count;
+                                lqs->lqs_iwrite_pending += pending[i];
+                        }
+                }
+
+                /* if xx_rec < 0, that means quota are releasing,
+                 * and it may return before we use quota. So if
+                 * we find this situation, we assuming it has
+                 * returned b=18491 */
+                if (isblk && lqs->lqs_blk_rec < 0) {
+                        if (qdata[i].qd_count < -lqs->lqs_blk_rec)
+                                qdata[i].qd_count = 0;
+                        else
+                                qdata[i].qd_count += lqs->lqs_blk_rec;
+                }
+                if (!isblk && lqs->lqs_ino_rec < 0) {
+                        if (qdata[i].qd_count < -lqs->lqs_ino_rec)
+                                qdata[i].qd_count = 0;
+                        else
+                                qdata[i].qd_count += lqs->lqs_ino_rec;
+                }
+
+                CDEBUG(D_QUOTA, "count=%d lqs_pending=%lu qd_count="LPU64
+                       " isblk=%d mb=%d pending[%d]=%d\n", count,
+                       isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
+                       qdata[i].qd_count, isblk, mb, i, pending[i]);
+                if (rc2[i] == QUOTA_RET_OK) {
+                        if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
+                                rc2[i] = QUOTA_RET_ACQUOTA;
+                        if (!isblk && qdata[i].qd_count <
+                            lqs->lqs_iwrite_pending)
+                                rc2[i] = QUOTA_RET_ACQUOTA;
+                }
+
+                spin_unlock(&lqs->lqs_lock);
+
+                if (lqs->lqs_blk_rec  < 0 &&
+                    qdata[i].qd_count <
+                    lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
+                        OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
+
+                /* When cycle is zero, lqs_*_pending will be changed. We will
+                 * get reference of the lqs here and put reference of lqs in
+                 * quota_pending_commit b=14784 */
+                if (!cycle)
+                        lqs_getref(lqs);
+
+                /* this is for quota_search_lqs */
+                lqs_putref(lqs);
+        }
+
+        if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
+                RETURN(QUOTA_RET_ACQUOTA);
+        else
+                RETURN(rc);
+}
+
+static int quota_chk_acq_common(struct obd_device *obd, unsigned int uid,
+                                unsigned int gid, int count, int pending[2],
+                                int isblk, quota_acquire acquire,
+                                struct obd_trans_info *oti, struct inode *inode,
+                                int frags)
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
+        struct l_wait_info lwi = { 0 };
+        int rc = 0, cycle = 0, count_err = 1;
+        ENTRY;
+
+        CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
+        /* Unfortunately, if quota master is too busy to handle the
+         * pre-dqacq in time and quota hash on ost is used up, we
+         * have to wait for the completion of in flight dqacq/dqrel,
+         * in order to get enough quota for write b=12588 */
+        do_gettimeofday(&work_start);
+        while ((rc = quota_check_common(obd, uid, gid, count, cycle, isblk,
+                                        inode, frags, pending)) & QUOTA_RET_ACQUOTA) {
+
+                spin_lock(&qctxt->lqc_lock);
+                if (!qctxt->lqc_import && oti) {
+                        spin_unlock(&qctxt->lqc_lock);
+
+                        LASSERT(oti && oti->oti_thread &&
+                                oti->oti_thread->t_watchdog);
+
+                        lc_watchdog_disable(oti->oti_thread->t_watchdog);
+                        CDEBUG(D_QUOTA, "sleep for quota master\n");
+                        l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
+                                     &lwi);
+                        CDEBUG(D_QUOTA, "wake up when quota master is back\n");
+                        lc_watchdog_touch(oti->oti_thread->t_watchdog,
+                                 GET_TIMEOUT(oti->oti_thread->t_svc));
+                } else {
+                        spin_unlock(&qctxt->lqc_lock);
+                }
+
+                cycle++;
+                if (isblk)
+                        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
+                /* after acquire(), we should run quota_check_common again
+                 * so that we confirm there are enough quota to finish write */
+                rc = acquire(obd, uid, gid, oti);
+
+                /* please reference to dqacq_completion for the below */
+                /* a new request is finished, try again */
+                if (rc == QUOTA_REQ_RETURNED) {
+                        CDEBUG(D_QUOTA, "finish a quota req, try again\n");
+                        continue;
+                }
+
+                /* it is out of quota already */
+                if (rc == -EDQUOT) {
+                        CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
+                        break;
+                }
+
+                /* -EBUSY and others, wait a second and try again */
+                if (rc < 0) {
+                        cfs_waitq_t        waitq;
+                        struct l_wait_info lwi;
+
+                        if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
+                                lc_watchdog_touch(oti->oti_thread->t_watchdog,
+                                         GET_TIMEOUT(oti->oti_thread->t_svc));
+                        CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
+                               count_err++);
+
+                        init_waitqueue_head(&waitq);
+                        lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
+                                          NULL);
+                        l_wait_event(waitq, 0, &lwi);
+                }
+
+                if (rc < 0 || cycle % 10 == 0) {
+                        spin_lock(&last_print_lock);
+                        if (last_print == 0 ||
+                            cfs_time_before((last_print + cfs_time_seconds(30)),
+                                            cfs_time_current())) {
+                                CWARN("still haven't managed to acquire quota "
+                                      "space from the quota master after %d "
+                                      "retries (err=%d, rc=%d)\n",
+                                      cycle, count_err - 1, rc);
+                                last_print = cfs_time_current();
+                        }
+                        spin_unlock(&last_print_lock);
+                }
+
+                CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
+                       cycle);
+        }
+
+        do_gettimeofday(&work_end);
+        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+        lprocfs_counter_add(qctxt->lqc_stats,
+                            isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
+                                    LQUOTA_WAIT_FOR_CHK_INO,
+                            timediff);
+
+        RETURN(rc);
+}
+
+int quota_is_set(struct obd_device *obd, unsigned int uid,
+                 unsigned int gid, int flag)
+{
+        struct lustre_qunit_size *lqs;
+        __u32 id[MAXQUOTAS] = { uid, gid };
+        int i, q_set = 0;
+
+        if (!ll_sb_any_quota_active(obd->u.obt.obt_qctxt.lqc_sb))
+                RETURN(0);
+
+        for (i = 0; i < MAXQUOTAS; i++) {
+                lqs = quota_search_lqs(LQS_KEY(i, id[i]),
+                                       &obd->u.obt.obt_qctxt, 0);
+                if (lqs && !IS_ERR(lqs)) {
+                        if (lqs->lqs_flags & flag)
+                                q_set = 1;
+                        lqs_putref(lqs);
+                }
+        }
+
+        return q_set;
+}
+
+static int filter_quota_check(struct obd_device *obd, unsigned int uid,
+                              unsigned int gid, int npage, int pending[2],
+                              quota_acquire acquire, struct obd_trans_info *oti,
+                              struct inode *inode, int frags)
+{
+        return quota_is_set(obd, uid, gid, QB_SET) ?
+                quota_chk_acq_common(obd, uid, gid, npage, pending,
+                                     LQUOTA_FLAGS_BLK, acquire, oti, inode,
+                                     frags) : 0;
+}
+
+/* when a block_write or inode_create rpc is finished, adjust the record for
+ * pending blocks and inodes*/
+static int quota_pending_commit(struct obd_device *obd, unsigned int uid,
+                                unsigned int gid, int pending[2], int isblk)
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
+        int i;
+        __u32 id[MAXQUOTAS] = { uid, gid };
+        struct qunit_data qdata[MAXQUOTAS];
+        ENTRY;
+
+        CDEBUG(D_QUOTA, "%s: commit pending quota\n", obd->obd_name);
+        CLASSERT(MAXQUOTAS < 4);
+
+        do_gettimeofday(&work_start);
+        for (i = 0; i < MAXQUOTAS; i++) {
+                struct lustre_qunit_size *lqs = NULL;
+                int flag = 0;
+
+                qdata[i].qd_id = id[i];
+                qdata[i].qd_flags = i;
+                if (isblk)
+                        QDATA_SET_BLK(&qdata[i]);
+                qdata[i].qd_count = 0;
+
+                if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
+                        continue;
+
+                lqs = quota_search_lqs(LQS_KEY(i, qdata[i].qd_id), qctxt, 0);
+                if (lqs == NULL || IS_ERR(lqs))
+                        continue;
+
+                spin_lock(&lqs->lqs_lock);
+                if (isblk) {
+                        if (lqs->lqs_bwrite_pending >= pending[i]) {
+                                lqs->lqs_bwrite_pending -= pending[i];
+                                flag = 1;
+                        } else {
+                                CERROR("%s: there are too many blocks!\n",
+                                       obd->obd_name);
+                        }
+                } else {
+                        if (lqs->lqs_iwrite_pending >= pending[i]) {
+                                lqs->lqs_iwrite_pending -= pending[i];
+                                flag = 1;
+                        } else {
+                                CERROR("%s: there are too many files!\n",
+                                       obd->obd_name);
+                        }
+                }
+                CDEBUG(D_QUOTA, "%s: lqs_pending=%lu pending[%d]=%d isblk=%d\n",
+                       obd->obd_name,
+                       isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
+                       i, pending[i], isblk);
+
+                spin_unlock(&lqs->lqs_lock);
+                lqs_putref(lqs);
+                /* When lqs_*_pening is changed back, we'll putref lqs
+                 * here b=14784 */
+                if (flag)
+                        lqs_putref(lqs);
+        }
+        do_gettimeofday(&work_end);
+        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+        lprocfs_counter_add(qctxt->lqc_stats,
+                            isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
+                                    LQUOTA_WAIT_FOR_COMMIT_INO,
+                            timediff);
+
+        RETURN(0);
+}
+
+static int filter_quota_pending_commit(struct obd_device *obd, unsigned int uid,
+                                       unsigned int gid, int pending[2])
+{
+        return quota_pending_commit(obd, uid, gid, pending, LQUOTA_FLAGS_BLK);
 }
 
 static int mds_quota_init(void)
@@ -344,33 +590,27 @@ static int mds_quota_exit(void)
         return 0;
 }
 
-static int mds_quota_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+static int mds_quota_setup(struct obd_device *obd)
 {
         struct obd_device_target *obt = &obd->u.obt;
         struct mds_obd *mds = &obd->u.mds;
         int rc;
         ENTRY;
 
+#ifdef HAVE_QUOTA64
+        obt->obt_qfmt = LUSTRE_QUOTA_V2;
+#else
+        obt->obt_qfmt = LUSTRE_QUOTA_V1;
+#endif
+        mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
         atomic_set(&obt->obt_quotachecking, 1);
         /* initialize quota master and quota context */
         sema_init(&mds->mds_qonoff_sem, 1);
-        rc = qctxt_init(&obt->obt_qctxt, obt->obt_sb, dqacq_handler);
+        rc = qctxt_init(obd, dqacq_handler);
         if (rc) {
                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
                 RETURN(rc);
         }
-
-        /* Based on quota config args, set qunit sizes and enable quota */
-        if (LUSTRE_CFG_BUFLEN(lcfg, 5) > 0 && lustre_cfg_buf(lcfg, 5)) {
-                char *args = lustre_cfg_string(lcfg, 5);
-                int quotaon = 0, type;
-                int err;
-
-                err = quota_parse_config_args(args, &quotaon, &type,
-                                              &obt->obt_qctxt);
-                if (!err && quotaon)
-                        mds_auto_quota_on(obd, type);
-        }
         RETURN(rc);
 }
 
@@ -383,20 +623,45 @@ static int mds_quota_cleanup(struct obd_device *obd)
 static int mds_quota_fs_cleanup(struct obd_device *obd)
 {
         struct mds_obd *mds = &obd->u.mds;
-        int i;
+        struct obd_quotactl oqctl;
         ENTRY;
 
-        /* close admin quota files */
+        memset(&oqctl, 0, sizeof(oqctl));
+        oqctl.qc_type = UGQUOTA;
+
         down(&mds->mds_qonoff_sem);
-        for (i = 0; i < MAXQUOTAS; i++) {
-                if (mds->mds_quota_info.qi_files[i]) {
-                        filp_close(mds->mds_quota_info.qi_files[i], 0);
-                        mds->mds_quota_info.qi_files[i] = NULL;
-                }
-        }
+        mds_admin_quota_off(obd, &oqctl);
         up(&mds->mds_qonoff_sem);
         RETURN(0);
 }
+
+static int mds_quota_check(struct obd_device *obd, unsigned int uid,
+                           unsigned int gid, int inodes, int pending[2],
+                           quota_acquire acquire, struct obd_trans_info *oti,
+                           struct inode *inode, int frags)
+{
+        return quota_is_set(obd, uid, gid, QI_SET) ?
+                quota_chk_acq_common(obd, uid, gid, inodes, pending, 0,
+                                     acquire, oti, inode, frags) : 0;
+}
+
+static int mds_quota_acquire(struct obd_device *obd, unsigned int uid,
+                             unsigned int gid, struct obd_trans_info *oti)
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        int rc;
+        ENTRY;
+
+        rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 0, 1, oti);
+        RETURN(rc);
+}
+
+static int mds_quota_pending_commit(struct obd_device *obd, unsigned int uid,
+                                    unsigned int gid, int pending[2])
+{
+        return quota_pending_commit(obd, uid, gid, pending, 0);
+}
+#endif /* HAVE_QUOTA_SUPPORT */
 #endif /* __KERNEL__ */
 
 struct osc_quota_info {
@@ -412,9 +677,10 @@ static struct list_head qinfo_hash[NR_DQHASH];
 /* SLAB cache for client quota context */
 cfs_mem_cache_t *qinfo_cachep = NULL;
 
-static inline int const hashfn(struct client_obd *cli,
-                               unsigned long id,
-                               int type)
+static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
+                         __attribute__((__const__));
+
+static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
 {
         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
@@ -464,7 +730,7 @@ static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
         if(!oqi)
                 RETURN(NULL);
 
-        INIT_LIST_HEAD(&oqi->oqi_hash);
+        CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
         oqi->oqi_cli = cli;
         oqi->oqi_id = id;
         oqi->oqi_type = type;
@@ -477,8 +743,7 @@ static void free_qinfo(struct osc_quota_info *oqi)
         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
 }
 
-int osc_quota_chkdq(struct client_obd *cli,
-                    unsigned int uid, unsigned int gid)
+int osc_quota_chkdq(struct client_obd *cli, unsigned int uid, unsigned int gid)
 {
         unsigned int id;
         int cnt, rc = QUOTA_OK;
@@ -500,8 +765,7 @@ int osc_quota_chkdq(struct client_obd *cli,
         RETURN(rc);
 }
 
-int osc_quota_setdq(struct client_obd *cli,
-                    unsigned int uid, unsigned int gid,
+int osc_quota_setdq(struct client_obd *cli, unsigned int uid, unsigned int gid,
                     obd_flag valid, obd_flag flags)
 {
         unsigned int id;
@@ -575,13 +839,13 @@ int osc_quota_init(void)
 
         LASSERT(qinfo_cachep == NULL);
         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
-                                         sizeof(struct osc_quota_info),
-                                         0, 0);
+                                            sizeof(struct osc_quota_info),
+                                            0, 0);
         if (!qinfo_cachep)
                 RETURN(-ENOMEM);
 
         for (i = 0; i < NR_DQHASH; i++)
-                INIT_LIST_HEAD(qinfo_hash + i);
+                CFS_INIT_LIST_HEAD(qinfo_hash + i);
 
         RETURN(0);
 }
@@ -609,6 +873,7 @@ int osc_quota_exit(void)
 }
 
 #ifdef __KERNEL__
+#ifdef HAVE_QUOTA_SUPPORT
 quota_interface_t mds_quota_interface = {
         .quota_init     = mds_quota_init,
         .quota_exit     = mds_quota_exit,
@@ -619,6 +884,9 @@ quota_interface_t mds_quota_interface = {
         .quota_fs_cleanup       =mds_quota_fs_cleanup,
         .quota_recovery = mds_quota_recovery,
         .quota_adjust   = mds_quota_adjust,
+        .quota_chkquota = mds_quota_check,
+        .quota_acquire  = mds_quota_acquire,
+        .quota_pending_commit = mds_quota_pending_commit,
 };
 
 quota_interface_t filter_quota_interface = {
@@ -627,11 +895,16 @@ quota_interface_t filter_quota_interface = {
         .quota_check    = target_quota_check,
         .quota_ctl      = filter_quota_ctl,
         .quota_setinfo  = filter_quota_setinfo,
+        .quota_clearinfo = filter_quota_clearinfo,
         .quota_enforce  = filter_quota_enforce,
         .quota_getflag  = filter_quota_getflag,
         .quota_acquire  = filter_quota_acquire,
         .quota_adjust   = filter_quota_adjust,
+        .quota_chkquota = filter_quota_check,
+        .quota_adjust_qunit   = filter_quota_adjust_qunit,
+        .quota_pending_commit = filter_quota_pending_commit,
 };
+#endif
 #endif /* __KERNEL__ */
 
 quota_interface_t mdc_quota_interface = {
@@ -649,21 +922,40 @@ quota_interface_t osc_quota_interface = {
         .quota_chkdq    = osc_quota_chkdq,
         .quota_setdq    = osc_quota_setdq,
         .quota_cleanup  = osc_quota_cleanup,
+        .quota_adjust_qunit = client_quota_adjust_qunit,
 };
 
 quota_interface_t lov_quota_interface = {
         .quota_check    = lov_quota_check,
         .quota_ctl      = lov_quota_ctl,
+        .quota_adjust_qunit = lov_quota_adjust_qunit,
 };
 
 #ifdef __KERNEL__
+
+cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
+
 static int __init init_lustre_quota(void)
 {
-        int rc = qunit_cache_init();
+#ifdef HAVE_QUOTA_SUPPORT
+        int rc = 0;
+
+        lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
+                                                proc_lustre_root,
+                                                NULL, NULL);
+        if (IS_ERR(lquota_type_proc_dir)) {
+                CERROR("LProcFS failed in lquota-init\n");
+                rc = PTR_ERR(lquota_type_proc_dir);
+                return rc;
+        }
+
+        rc = qunit_cache_init();
         if (rc)
                 return rc;
+
         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
+#endif
         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
@@ -672,23 +964,30 @@ static int __init init_lustre_quota(void)
 
 static void /*__exit*/ exit_lustre_quota(void)
 {
-        PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
-        PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
+#ifdef HAVE_QUOTA_SUPPORT
+        PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
+        PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
 
         qunit_cache_cleanup();
+
+        if (lquota_type_proc_dir)
+                lprocfs_remove(&lquota_type_proc_dir);
+#endif
 }
 
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
 MODULE_DESCRIPTION("Lustre Quota");
 MODULE_LICENSE("GPL");
 
 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
 
+#ifdef HAVE_QUOTA_SUPPORT
 EXPORT_SYMBOL(mds_quota_interface);
 EXPORT_SYMBOL(filter_quota_interface);
+#endif
 EXPORT_SYMBOL(mdc_quota_interface);
 EXPORT_SYMBOL(osc_quota_interface);
 EXPORT_SYMBOL(lov_quota_interface);