Whamcloud - gitweb
LU-15880 quota: fix insane grant quota 81/48981/11
authorHongchao Zhang <hongchao@whamcloud.com>
Mon, 16 Jan 2023 02:21:09 +0000 (21:21 -0500)
committerOleg Drokin <green@whamcloud.com>
Tue, 31 Jan 2023 02:34:12 +0000 (02:34 +0000)
Fix the insane grant value in quota master/slave index,
the logs often contain the content similar to the following,

LustreError: 39815:0:(qmt_handler.c:527:qmt_dqacq0())
$$$ Release too much! uuid:work-MDT0000-lwp-MDT0002_UUID
release:18446744070274413724 granted:18446744070291193856,
total:4118877744 qmt:work-QMT0000 pool:0-dt id:40212 enforced:1
hard:128849018880 soft:12884901888 granted:4118877744 time:0
qunit: 16777216 edquot:0 may_rel:0 revoke:0 default:no

It could be caused by chgrp, which reserves quota before changing
GID for some file at MDT, then release the reserved quota after
the file GID has been changed on the corresponding OST, (this issue
is tracked at LU-5152 and LU-11303)

In some case, some quota could be released even the quota was not
reserved correctly, which cause the grant quota to be some negative
value, which is regarded as some insane big value because the type
of grant is "__u64", then the normal grant release will fail and
the grant field of some quota ID in the quota file (both at QMT and
QSD) contain insane value, but can't be reset correctly.

This patch resets the affected quota by clear the quota limits and
grant, and the grant will be reported by each QSD when the quota ID
is enforced again, then rebuild the grant at QMT.

Signed-off-by: Hongchao Zhang <hongchao@whamcloud.com>
Change-Id: I083afa3b6648db5a1ccca0235667da022ff27e65
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48981
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sergey Cheremencev <scherementsev@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
14 files changed:
lustre/doc/lfs-setquota.1
lustre/include/obd_support.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/dir.c
lustre/mdt/mdt_handler.c
lustre/quota/lquota_internal.h
lustre/quota/qmt_entry.c
lustre/quota/qmt_handler.c
lustre/quota/qmt_lock.c
lustre/quota/qsd_entry.c
lustre/quota/qsd_handler.c
lustre/quota/qsd_writeback.c
lustre/tests/sanity-quota.sh
lustre/utils/lfs.c

index 5b07f63..ff3dbd9 100644 (file)
@@ -25,6 +25,8 @@ lfs setquota \- set quota limits or grace time for users, groups or projects.
 .TP
 .BR "lfs setquota " { -u | --user | -g | --group | -p | --projid "} " \fIUID\fR|\fIGID\fR|\fIPROJID\fR
        [\fB--delete\fR] <\fIfilesystem\fR>
+.BR "lfs setquota " { -u | --user | -g | --group | -p | --projid "} " \fIUID\fR|\fIGID\fR|\fIPROJID\fR
+       [\fB-r\fR] <\fIfilesystem\fR>
 .TP
 .SH DESCRIPTION
 .TP
@@ -69,6 +71,10 @@ Set project quota for \fIPROJID\fR.
 .B --pool \fIPOOL_NAME\fR
 Set quota per OST pool \fIPOOL_NAME\fR.
 .TP
+.BR -r
+Reset the internal quota data of the user|group|project quota. It can be used
+to fix the quota containing corrupted internal data (such as, the quota grant).
+.TP
 .BR -u | --user \fIUSERNAME\fR|\fBUID
 Set user quota for \fIUNAME\fR or \fIUID\fR.
 .PP
@@ -157,6 +163,9 @@ Command deletes the unused UID|GID|PROJID from Quota settings.
 .TP
 .B $ lfs setquota -u bob --delete /mnt/lustre
 Delete unused user 'bob'.
+.TP
+.B $ lfs setquota -u bob -r /mnt/lustre
+Reset the user 'bob'.
 .SH SEE ALSO
 .BR lfs (1),
 .BR lfs-quota(1)
index d765048..12c8747 100644 (file)
@@ -549,6 +549,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_QUOTA_INIT              0xA05
 #define OBD_FAIL_QUOTA_PREACQ            0xA06
 #define OBD_FAIL_QUOTA_RECALC            0xA07
+#define OBD_FAIL_QUOTA_GRANT             0xA08
 
 #define OBD_FAIL_LPROC_REMOVE            0xB00
 
index 36e1e70..391b8c9 100644 (file)
@@ -1342,6 +1342,7 @@ static inline __u64 toqb(__kernel_size_t space)
 #define LUSTRE_Q_GETDEFAULT_POOL       0x800013  /* get default pool quota*/
 #define LUSTRE_Q_SETDEFAULT_POOL       0x800014  /* set default pool quota */
 #define LUSTRE_Q_DELETEQID     0x800015  /* delete quota ID */
+#define LUSTRE_Q_RESETQID      0x800016  /* reset quota ID */
 /* In the current Lustre implementation, the grace time is either the time
  * or the timestamp to be used after some quota ID exceeds the soft limt,
  * 48 bits should be enough, its high 16 bits can be used as quota flags.
@@ -1367,6 +1368,7 @@ static inline __u64 toqb(__kernel_size_t space)
  * */
 #define LQUOTA_FLAG_DEFAULT    0x0001
 #define LQUOTA_FLAG_DELETED    0x0002
+#define LQUOTA_FLAG_RESET      0x0004
 
 #define LUSTRE_Q_CMD_IS_POOL(cmd)              \
        (cmd == LUSTRE_Q_GETQUOTAPOOL ||        \
index 869fc9f..7aa9947 100644 (file)
@@ -1203,6 +1203,7 @@ int quotactl_ioctl(struct super_block *sb, struct if_quotactl *qctl)
        case LUSTRE_Q_SETINFOPOOL:
        case LUSTRE_Q_SETDEFAULT_POOL:
        case LUSTRE_Q_DELETEQID:
+       case LUSTRE_Q_RESETQID:
                if (!capable(CAP_SYS_ADMIN))
                        RETURN(-EPERM);
 
index 692731b..19e255a 100644 (file)
@@ -3208,6 +3208,7 @@ static int mdt_quotactl(struct tgt_session_info *tsi)
        case LUSTRE_Q_SETINFOPOOL:
        case LUSTRE_Q_SETDEFAULT_POOL:
        case LUSTRE_Q_DELETEQID:
+       case LUSTRE_Q_RESETQID:
                if (!nodemap_can_setquota(nodemap, oqctl->qc_type,
                                          oqctl->qc_id))
                        GOTO(out_nodemap, rc = -EPERM);
@@ -3279,6 +3280,7 @@ static int mdt_quotactl(struct tgt_session_info *tsi)
        case LUSTRE_Q_SETDEFAULT_POOL:
        case LUSTRE_Q_GETDEFAULT_POOL:
        case LUSTRE_Q_DELETEQID:
+       case LUSTRE_Q_RESETQID:
                /* forward quotactl request to QMT */
                rc = qmt_hdls.qmth_quotactl(tsi->tsi_env, qmt, oqctl);
                break;
index c03020b..e4a481c 100644 (file)
@@ -188,7 +188,8 @@ struct lquota_entry {
                        lqe_nopreacq:1,   /* pre-acquire disabled */
                        lqe_is_default:1, /* the default quota is used */
                        lqe_is_global:1,  /* lqe belongs to global pool "0x0"*/
-                       lqe_is_deleted:1; /* lqe will be deleted soon */
+                       lqe_is_deleted:1, /* lqe will be deleted soon */
+                       lqe_is_reset:1;   /* lqe has been reset */
 
        /* the lock to protect lqe_glbl_data */
        struct mutex             lqe_glbl_data_lock;
index 92d83b6..ee55585 100644 (file)
@@ -136,6 +136,9 @@ static int qmt_lqe_read(const struct lu_env *env, struct lquota_entry *lqe,
                    (LQUOTA_FLAG(qti->qti_glb_rec.qbr_time) &
                     LQUOTA_FLAG_DEFAULT))
                        qmt_lqe_set_default(env, pool, lqe, false);
+               else if (LQUOTA_FLAG(qti->qti_glb_rec.qbr_time) &
+                                                       LQUOTA_FLAG_RESET)
+                       lqe->lqe_is_reset = true;
                break;
        default:
                LQUOTA_ERROR(lqe, "failed to read quota entry from disk, rc:%d",
@@ -356,6 +359,12 @@ int qmt_glb_write(const struct lu_env *env, struct thandle *th,
                rec->qbr_softlimit = 0;
                rec->qbr_time      = LQUOTA_GRACE_FLAG(lqe->lqe_gracetime,
                                                       LQUOTA_FLAG_DEFAULT);
+       } else if (lqe->lqe_is_reset) {
+               rec->qbr_hardlimit = 0;
+               rec->qbr_softlimit = 0;
+               rec->qbr_granted = 0;
+               rec->qbr_time      = LQUOTA_GRACE_FLAG(lqe->lqe_gracetime,
+                                                      LQUOTA_FLAG_RESET);
        } else {
                rec->qbr_hardlimit = lqe->lqe_hardlimit;
                rec->qbr_softlimit = lqe->lqe_softlimit;
index 6ef8377..3b54192 100644 (file)
@@ -344,6 +344,7 @@ static int qmt_set(const struct lu_env *env, struct qmt_device *qmt,
                        RETURN(PTR_ERR(lqe));
 
        lqe->lqe_is_deleted = 0;
+       lqe->lqe_is_reset = 0;
        rc = qmt_set_with_lqe(env, qmt, lqe, hard, soft, time, valid,
                              is_default, is_updated);
        if (rc == 0)
@@ -414,6 +415,133 @@ out:
        RETURN(rc);
 }
 
+static int qmt_reset_slv_cb(const struct lu_env *env, struct lu_fid *glb_fid,
+                           char *slv_name, struct lu_fid *slv_fid, void *arg)
+{
+       struct qmt_device *qmt = (struct qmt_device *)arg;
+       struct qmt_thread_info *qti = qmt_info(env);
+       struct dt_object *slv_obj = NULL;
+       struct lquota_slv_rec rec;
+       struct thandle *th = NULL;
+       int rc;
+
+       slv_obj = dt_locate(env, qmt->qmt_child, slv_fid);
+       if (IS_ERR(slv_obj))
+               GOTO(out, rc = PTR_ERR(slv_obj));
+
+       if (slv_obj->do_index_ops == NULL) {
+               rc = slv_obj->do_ops->do_index_try(env, slv_obj,
+                                                  &dt_quota_slv_features);
+               if (rc) {
+                       CERROR("%s: fail to setup slave idx for %s: rc = %d\n",
+                              qmt->qmt_child->dd_lu_dev.ld_obd->obd_name,
+                              slv_name, rc);
+                       GOTO(out, rc);
+               }
+       }
+
+       th = qmt_trans_start(env, qti_lqes(env)[0]);
+       if (IS_ERR(th))
+               GOTO(out, rc = PTR_ERR(th));
+
+       rec.qsr_granted = 0;
+       rc = lquota_disk_write(env, th, slv_obj, &qti->qti_id,
+                              (struct dt_rec *)&rec, 0, NULL);
+       if (rc)
+               CERROR("%s: failed to reset slave grant for %s: rc = %d\n",
+                      qmt->qmt_child->dd_lu_dev.ld_obd->obd_name, slv_name,
+                      rc);
+out:
+       if (!IS_ERR_OR_NULL(th))
+               dt_trans_stop(env, qmt->qmt_child, th);
+
+       if (slv_obj != NULL)
+               dt_object_put(env, slv_obj);
+       return 0;
+}
+
+/*
+ * Reset the quota of the quota ID, it will reset the soft/hard limit and grant
+ *
+ * \param env        - is the environment passed by the caller
+ * \param qmt        - is the quota master target
+ * \param restype    - is the pool type, either block (i.e. LQUOTA_RES_DT) or
+ *                     inode (i.e. LQUOTA_RES_MD)
+ * \param qtype      - is the quota type
+ * \param qid        - is the quota indentifier for which we want to delete its
+ *                     quota settings.
+ */
+static int qmt_reset_qid(const struct lu_env *env, struct qmt_device *qmt,
+                        __u8 restype, __u8 qtype, __u64 qid)
+{
+       struct qmt_thread_info *qti = qmt_info(env);
+       union lquota_id *quota_id = &qti->qti_id;
+       struct qmt_pool_info *qpi = NULL;
+       struct lquota_entry *lqe = NULL;
+       struct thandle *th = NULL;
+       __u64 softlimit = 0, hardlimit = 0;
+       __u64 ver = 0;
+       int rc;
+
+       ENTRY;
+
+       quota_id->qid_uid = qid;
+       lqe = qmt_pool_lqe_lookup(env, qmt, restype, qtype, quota_id, NULL);
+       if (IS_ERR(lqe))
+               RETURN(PTR_ERR(lqe));
+
+       lqe_write_lock(lqe);
+
+       qpi = qmt_pool_lookup_glb(env, qmt, restype);
+       if (IS_ERR(qpi))
+               GOTO(out, rc = -ENOMEM);
+
+       th = qmt_trans_start(env, lqe);
+       if (IS_ERR(th))
+               GOTO(out, rc = PTR_ERR(th));
+
+       softlimit = lqe->lqe_softlimit;
+       hardlimit = lqe->lqe_hardlimit;
+
+       lqe->lqe_softlimit = 0;
+       lqe->lqe_hardlimit = 0;
+       lqe->lqe_granted = 0;
+       lqe->lqe_edquot = 0;
+       lqe->lqe_qunit = 0;
+       lqe->lqe_is_default = 0;
+       lqe->lqe_is_deleted = 0;
+       lqe->lqe_is_reset = 1;
+       rc = qmt_glb_write(env, th, lqe, LQUOTA_BUMP_VER, &ver);
+       if (rc)
+               LQUOTA_ERROR(lqe, "failed to write quota global rec\n");
+       dt_trans_stop(env, qmt->qmt_child, th);
+       if (rc)
+               GOTO(out, rc);
+
+       lquota_generate_fid(&qti->qti_fid, restype, qtype);
+       qti_lqes(env)[0] = lqe;
+       lquota_disk_for_each_slv(env, qpi->qpi_root, &qti->qti_fid,
+                                qmt_reset_slv_cb, qmt);
+
+       qmt_glb_lock_notify(env, lqe, ver);
+
+out:
+       if (rc) {
+               if (softlimit != 0)
+                       lqe->lqe_softlimit = softlimit;
+               if (hardlimit != 0)
+                       lqe->lqe_hardlimit = hardlimit;
+               lqe->lqe_is_reset = 0;
+       }
+
+       if (!IS_ERR_OR_NULL(qpi))
+               qpi_putref(env, qpi);
+
+       lqe_write_unlock(lqe);
+       lqe_putref(lqe);
+
+       RETURN(rc);
+}
 /*
  * Handle quotactl request.
  *
@@ -565,6 +693,41 @@ static int qmt_quotactl(const struct lu_env *env, struct lu_device *ld,
                                    oqctl->qc_id);
                break;
 
+       case LUSTRE_Q_RESETQID:
+               if (oqctl->qc_id == 0)
+                       RETURN(-EINVAL);
+
+               id->qid_uid = oqctl->qc_id;
+               /* save the quota setting before resetting */
+               rc = qmt_get(env, qmt, LQUOTA_RES_MD, oqctl->qc_type, id,
+                            &dqb->dqb_ihardlimit, &dqb->dqb_isoftlimit,
+                            &dqb->dqb_itime, false, NULL);
+               if (rc)
+                       break;
+               else
+                       dqb->dqb_valid |= QIF_ILIMITS | QIF_ITIME;
+
+               rc = qmt_get(env, qmt, LQUOTA_RES_DT, oqctl->qc_type, id,
+                            &dqb->dqb_bhardlimit, &dqb->dqb_bsoftlimit,
+                            &dqb->dqb_btime, false, NULL);
+               if (rc)
+                       break;
+
+               dqb->dqb_valid |= QIF_BLIMITS | QIF_BTIME;
+               dqb->dqb_curinodes = 0;
+               dqb->dqb_curspace = 0;
+
+               /* reset the corresponding quota ID */
+               rc = qmt_reset_qid(env, qmt, LQUOTA_RES_MD, oqctl->qc_type,
+                                  oqctl->qc_id);
+               if (rc)
+                       break;
+
+               rc = qmt_reset_qid(env, qmt, LQUOTA_RES_DT, oqctl->qc_type,
+                                  oqctl->qc_id);
+               break;
+
+
        default:
                CERROR("%s: unsupported quotactl command: %d\n",
                       qmt->qmt_svname, oqctl->qc_cmd);
@@ -601,14 +764,22 @@ static inline bool qmt_lqes_can_rel(const struct lu_env *env, __u64 cnt)
        return can_release;
 }
 
-static inline void qmt_rel_lqes(const struct lu_env *env, __u64 *slv, __u64 cnt)
+static inline void qmt_rel_lqes(const struct lu_env *env, __u64 *slv, __u64 cnt,
+                               bool reset)
 {
        int i;
 
-       for (i = 0; i < qti_lqes_cnt(env); i++)
-               qti_lqe_granted(env, i) -= cnt;
+       for (i = 0; i < qti_lqes_cnt(env); i++) {
+               if (reset)
+                       qti_lqe_granted(env, i) = 0;
+               else
+                       qti_lqe_granted(env, i) -= cnt;
+       }
 
-       *slv -= cnt;
+       if (reset)
+               *slv = 0;
+       else
+               *slv -= cnt;
 }
 
 static inline bool qmt_lqes_cannot_grant(const struct lu_env *env, __u64 cnt)
@@ -718,7 +889,7 @@ int qmt_dqacq0(const struct lu_env *env, struct qmt_device *qmt,
               struct obd_uuid *uuid, __u32 qb_flags, __u64 qb_count,
               __u64 qb_usage, struct quota_body *repbody)
 {
-       __u64                    now, count;
+       __u64                    now, count = 0;
        struct dt_object        *slv_obj = NULL;
        __u64                    slv_granted, slv_granted_bck;
        struct thandle          *th = NULL;
@@ -778,6 +949,13 @@ int qmt_dqacq0(const struct lu_env *env, struct qmt_device *qmt,
        if (req_is_acq(qb_flags) && qb_count == 0)
                GOTO(out_locked, rc = 0);
 
+       if (lqe->lqe_is_reset) {
+               lqe->lqe_granted = 0;
+               repbody->qb_count = qb_count;
+               qmt_rel_lqes(env, &slv_granted, qb_count, lqe->lqe_is_reset);
+               GOTO(out_locked, rc = 0);
+       }
+
        /* fetch how much quota space is already granted to this slave */
        rc = qmt_slv_read(env, &lqe->lqe_id, slv_obj, &slv_granted);
        if (rc) {
@@ -806,7 +984,7 @@ int qmt_dqacq0(const struct lu_env *env, struct qmt_device *qmt,
 
                repbody->qb_count = qb_count;
                /* put released space back to global pool */
-               qmt_rel_lqes(env, &slv_granted, qb_count);
+               qmt_rel_lqes(env, &slv_granted, qb_count, lqe->lqe_is_reset);
                GOTO(out_write, rc = 0);
        }
 
@@ -881,6 +1059,9 @@ out_write:
        /* start/stop grace timer if required */
        qmt_lqes_tune_grace(env, now);
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_GRANT))
+               slv_granted = 0xFFFFFFFFFFF00000;
+
        /* Update slave index first since it is easier to roll back */
        ret = qmt_slv_write(env, th, lqe, slv_obj, LQUOTA_BUMP_VER,
                            &repbody->qb_slv_ver, slv_granted);
index 5ee05c2..ec8a345 100644 (file)
@@ -801,6 +801,11 @@ void qmt_glb_lock_notify(const struct lu_env *env, struct lquota_entry *lqe,
                qti->qti_gl_desc.lquota_desc.gl_softlimit = 0;
                qti->qti_gl_desc.lquota_desc.gl_time = LQUOTA_GRACE_FLAG(0,
                                                        LQUOTA_FLAG_DELETED);
+       } else if (lqe->lqe_is_reset) {
+               qti->qti_gl_desc.lquota_desc.gl_hardlimit = lqe->lqe_hardlimit;
+               qti->qti_gl_desc.lquota_desc.gl_softlimit = lqe->lqe_softlimit;
+               qti->qti_gl_desc.lquota_desc.gl_time = LQUOTA_GRACE_FLAG(0,
+                                                       LQUOTA_FLAG_RESET);
        } else {
                qti->qti_gl_desc.lquota_desc.gl_hardlimit = lqe->lqe_hardlimit;
                qti->qti_gl_desc.lquota_desc.gl_softlimit = lqe->lqe_softlimit;
index 12b877d..625f122 100644 (file)
@@ -93,6 +93,11 @@ static int qsd_lqe_read(const struct lu_env *env, struct lquota_entry *lqe,
                }
 
                if (lqe->lqe_id.qid_uid != 0 &&
+                   (LQUOTA_FLAG(qti->qti_glb_rec.qbr_time) &
+                                               LQUOTA_FLAG_RESET))
+                       lqe->lqe_is_reset = true;
+
+               if (lqe->lqe_id.qid_uid != 0 &&
                    (qti->qti_glb_rec.qbr_hardlimit != 0 ||
                     qti->qti_glb_rec.qbr_softlimit != 0))
                        lqe->lqe_enforced = true;
index 1602e59..b12d495 100644 (file)
@@ -352,7 +352,9 @@ static void qsd_req_completion(const struct lu_env *env,
        if (repbody != NULL && repbody->qb_count != 0) {
                LQUOTA_DEBUG(lqe, "DQACQ qb_count:%llu", repbody->qb_count);
 
-               if (req_is_rel(reqbody->qb_flags)) {
+               if (lqe->lqe_is_reset) {
+                       lqe->lqe_granted = 0;
+               } else if (req_is_rel(reqbody->qb_flags)) {
                        if (lqe->lqe_granted < repbody->qb_count) {
                                LQUOTA_ERROR(lqe, "can't release more space "
                                             "than owned %llu<%llu",
@@ -379,6 +381,9 @@ static void qsd_req_completion(const struct lu_env *env,
                 * We don't update the version of slave index copy on DQACQ.
                 * No locking is necessary since nobody can change
                 * lqe->lqe_granted while lqe->lqe_pending_req > 0 */
+               if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_GRANT))
+                       qti->qti_rec.lqr_slv_rec.qsr_granted =
+                                                       0xFFFFFFFFFFDEC80CULL;
                qsd_upd_schedule(qqi, lqe, &lqe->lqe_id, &qti->qti_rec, 0,
                                 false);
                lqe_write_lock(lqe);
index 9f36287..b322d36 100644 (file)
@@ -332,6 +332,7 @@ out_del:
                        GOTO(out, rc = PTR_ERR(lqe));
        }
 
+       lqe->lqe_is_reset = false;
        lqe->lqe_is_deleted = 0;
 
        /* The in-memory lqe update for slave index copy isn't deferred,
@@ -370,6 +371,20 @@ out:
 
                LQUOTA_DEBUG(lqe, "update to use default quota");
        }
+       if (upd->qur_global && rc == 0 &&
+           (LQUOTA_FLAG(upd->qur_rec.lqr_glb_rec.qbr_time) &
+                                                       LQUOTA_FLAG_RESET)) {
+               struct lquota_slv_rec srec;
+
+               lqe->lqe_granted = 0;
+               lqe->lqe_softlimit = 0;
+               lqe->lqe_hardlimit = 0;
+               lqe->lqe_is_default = false;
+               lqe->lqe_is_reset = true;
+
+               memset(&srec, 0, sizeof(srec));
+               rc = qsd_update_index(env, qqi, &upd->qur_qid, false, 0, &srec);
+       }
 
        if (lqe && !IS_ERR(lqe)) {
                lqe_putref(lqe);
index 7431d66..c2fc789 100755 (executable)
@@ -5644,6 +5644,109 @@ test_83()
 }
 run_test 83 "Setting default quota shouldn't affect grace time"
 
+test_84()
+{
+       (( $MDS1_VERSION >= $(version_code 2.15.53) )) ||
+               skip "need MDS 2.15.53 or later"
+
+       local dir1="$DIR/$tdir/dir1"
+       local TESTFILE1="$dir1/$tfile-1"
+       local waited=0
+       local grant=0
+       local grant2=0
+       local qp="qpool1"
+
+       mds_supports_qp
+
+       setup_quota_test || error "setup quota failed with $?"
+       quota_init
+       set_ost_qtype $QTYPE || error "enable ost quota failed"
+
+       pool_add $qp || error "pool_add failed"
+       pool_add_targets $qp 0 $(($OSTCOUNT - 1)) ||
+               error "pool_add_targets failed"
+
+       $LFS setquota -g $TSTUSR -B 10G $DIR ||
+               error "failed to set group quota for $TSTUSR"
+       $LFS setquota -g $TSTUSR -B 5G --pool $qp $DIR ||
+                error "set user quota failed"
+       $LFS quota -gv $TSTUSR $DIR
+
+       mkdir -p $dir1 || error "failed to mkdir"
+       chown $TSTUSR.$TSTUSR $dir1 || error "chown $dir1 failed"
+
+       $LFS setstripe -c 1 -i 0 $TESTFILE1
+       $LFS getstripe $TESTFILE1
+       chown $TSTUSR.$TSTUSR $TESTFILE1
+
+       $RUNAS $DD of=$TESTFILE1 count=60 conv=nocreat oflag=direct ||
+               quota_error g $TSTUSR "write failed"
+
+       sync
+       sleep 3
+       $LFS quota -gv $TSTUSR $DIR
+
+#define OBD_FAIL_QUOTA_GRANT 0xA08
+       lustre_fail mds 0xa08
+       lustre_fail ost 0xa08
+       sleep 1
+
+       # clear quota limits to trigger updating grant quota
+       $LFS setquota -g $TSTUSR -b 0 -B 0 $DIR ||
+               error "failed to clear the group quota for $TSTUSR"
+       $LFS quota -gv $TSTUSR $DIR
+
+       # the grant quota should be set as insane value
+       waited=0
+       while (( $waited < 60 )); do
+               grant=$(getquota -g $TSTUSR lustre-OST0000_UUID bhardlimit $qp)
+               grant2=$(getquota -g $TSTUSR lustre-OST0000_UUID bhardlimit)
+               (( ${#grant} == 20 && ${#grant2} == 20 )) && break
+
+               sleep 1
+               waited=$((waited + 1))
+       done
+
+       (( ${#grant} == 20 )) || error "pool grant is not set as insane value"
+       (( ${#grant2} == 20 )) || error "grant is not set as insane value"
+
+       lustre_fail mds_ost 0
+       sleep 1
+
+       # reset the quota
+       $LFS setquota -g $TSTUSR -r $DIR ||
+               error "failed to reset group quota for $TSTUSR"
+
+       sleep 3
+       $LFS quota -gv $TSTUSR $DIR
+
+       # the grant quota should be reset
+       grant=$(getquota -g $TSTUSR lustre-OST0000_UUID bhardlimit)
+       (( ${#grant} == 20 )) && error "grant is not cleared"
+       grant=$(getquota -g $TSTUSR lustre-OST0000_UUID bhardlimit $qp)
+       (( ${#grant} == 20 )) && error "pool grant is not cleared"
+
+       $LFS quota -gv $TSTUSR --pool $qp $DIR
+       local hlimit=$(getquota -g $TSTUSR global bhardlimit $qp)
+        [ $hlimit -eq 5242880 ] || error "pool limit is changed"
+
+       # test whether the quota still works
+       $LFS setquota -g $TSTUSR -B 100M $DIR ||
+               error "failed to set group quota for $TSTUSR"
+       $LFS quota -gv $TSTUSR $DIR
+
+       $RUNAS $DD of=$TESTFILE1 count=200 conv=nocreat oflag=direct &&
+               quota_error g $TSTUSR "dd succeed, expect EDQUOT"
+
+       $LFS setquota -g $TSTUSR -B 300M $DIR ||
+               error "failed to set group quota for $TSTUSR"
+       $LFS quota -gv $TSTUSR $DIR
+
+       $RUNAS $DD of=$TESTFILE1 count=200 conv=nocreat oflag=direct ||
+               quota_error g $TSTUSR "dd failed, expect succeed"
+}
+run_test 84 "Reset quota should fix the insane granted quota"
+
 quota_fini()
 {
        do_nodes $(comma_list $(nodes_list)) \
index ee2f20b..51d6f6e 100644 (file)
@@ -7682,6 +7682,153 @@ quota_type:
        return 0;
 }
 
+static int lfs_reset_quota(char *mnt, struct if_quotactl *qctl)
+{
+       struct if_quotactl tmp_qctl;
+       int index, md_count, dt_count;
+       int wait_phase = 0, wait_index = 0, wait_count = 0;
+       int rc, rc2;
+
+       /* reset the quota ID, the existing quota setting will be returned */
+       rc = llapi_quotactl(mnt, qctl);
+       if (rc)
+               return rc;
+
+       /* sanity check */
+       if ((qctl->qc_dqblk.dqb_valid & QIF_LIMITS) != QIF_LIMITS) {
+               fprintf(stderr,
+                       "the existing quota settings are not returned!\n");
+               return -EINVAL;
+       }
+
+       rc = llapi_get_obd_count(mnt, &md_count, 1);
+       if (rc) {
+               fprintf(stderr, "can not get mdt count: %s\n", strerror(-rc));
+               return rc;
+       }
+
+       rc = llapi_get_obd_count(mnt, &dt_count, 0);
+       if (rc) {
+               fprintf(stderr, "can not get ost count: %s\n", strerror(-rc));
+               return rc;
+       }
+
+       memset(&tmp_qctl, 0, sizeof(tmp_qctl));
+       tmp_qctl.qc_type = qctl->qc_type;
+       tmp_qctl.qc_id = qctl->qc_id;
+       tmp_qctl.qc_cmd = LUSTRE_Q_GETQUOTA;
+
+retry:
+       if (wait_phase == 0) {
+               for (index = wait_index; index < md_count; index++) {
+                       tmp_qctl.qc_idx = index;
+                       tmp_qctl.qc_valid = QC_MDTIDX;
+                       rc = llapi_quotactl(mnt, &tmp_qctl);
+                       if (rc == -ENODEV || rc == -ENODATA)
+                               continue;
+                       if (rc) {
+                               fprintf(stderr, "quotactl mdt%d failed: %s\n",
+                                       index, strerror(-rc));
+                               break;
+                       }
+                       /* check whether the md quota grant is reset */
+                       if (tmp_qctl.qc_dqblk.dqb_valid & QIF_LIMITS &&
+                           tmp_qctl.qc_dqblk.dqb_ihardlimit != 0)
+                               break;
+               }
+
+               if (index < md_count) {
+                       wait_phase = 0;
+                       wait_index = index;
+                       goto wait;
+               }
+       } else {
+               for (index = wait_index; index < dt_count; index++) {
+                       tmp_qctl.qc_idx = index;
+                       tmp_qctl.qc_valid = QC_OSTIDX;
+                       rc = llapi_quotactl(mnt, &tmp_qctl);
+                       if (rc == -ENODEV || rc == -ENODATA)
+                               continue;
+                       if (rc) {
+                               fprintf(stderr, "quotactl mdt%d failed: %s\n",
+                                       index, strerror(-rc));
+                               break;
+                       }
+                       /* check whether the dt quota grant is reset */
+                       if (tmp_qctl.qc_dqblk.dqb_valid & QIF_LIMITS &&
+                           tmp_qctl.qc_dqblk.dqb_bhardlimit != 0)
+                               break;
+               }
+
+               if (index < dt_count) {
+                       wait_phase = 1;
+                       wait_index = index;
+                       goto wait;
+               }
+       }
+
+       if (wait_phase == 0) {
+               wait_phase = 1;
+               goto retry;
+       }
+
+       goto out;
+
+wait:
+       if (rc || wait_count > 30) {
+               fprintf(stderr, "fail to reset the quota ID %d on OBDs\n",
+                       qctl->qc_id);
+               goto out;
+       }
+
+       wait_count++;
+       sleep(1);
+       fprintf(stdout, "wait %d seconds for OBDs to reset the quota ID %u\n",
+               wait_count, qctl->qc_id);
+       goto retry;
+
+
+out:
+       /* restore the quota setting */
+       if (qctl->qc_dqblk.dqb_isoftlimit == 0 &&
+           qctl->qc_dqblk.dqb_ihardlimit == 0 &&
+           qctl->qc_dqblk.dqb_bsoftlimit == 0 &&
+           qctl->qc_dqblk.dqb_bhardlimit == 0)
+               return rc;
+
+       memcpy(&tmp_qctl, qctl, sizeof(tmp_qctl));
+       tmp_qctl.qc_cmd = LUSTRE_Q_SETQUOTA;
+       rc2 = llapi_quotactl(mnt, &tmp_qctl);
+       if (!rc2)
+               return rc;
+
+       fprintf(stderr,
+               "fail to restore the quota setting: %s, please restore it manually by\n  lfs setquota %s %d",
+               strerror(-rc2),
+               qctl->qc_type == USRQUOTA ? "-u" :
+                               (qctl->qc_type == GRPQUOTA ? "-g" : "-p"),
+               qctl->qc_id);
+
+       if (qctl->qc_dqblk.dqb_isoftlimit != 0)
+               fprintf(stderr, " -i %llu",
+                       (unsigned long long)qctl->qc_dqblk.dqb_isoftlimit);
+       if (qctl->qc_dqblk.dqb_ihardlimit != 0)
+               fprintf(stderr, " -I %llu",
+                       (unsigned long long)qctl->qc_dqblk.dqb_ihardlimit);
+       if (qctl->qc_dqblk.dqb_bsoftlimit != 0)
+               fprintf(stderr, " -b %llu",
+                       (unsigned long long)qctl->qc_dqblk.dqb_bsoftlimit);
+       if (qctl->qc_dqblk.dqb_bhardlimit != 0)
+               fprintf(stderr, " -B %llu",
+                       (unsigned long long)qctl->qc_dqblk.dqb_bhardlimit);
+
+       fprintf(stderr, " %s\n", mnt);
+       if (!rc)
+               rc = rc2;
+
+       return rc;
+}
+
 #define BSLIMIT (1 << 0)
 #define BHLIMIT (1 << 1)
 #define ISLIMIT (1 << 2)
@@ -7710,6 +7857,7 @@ int lfs_setquota(int argc, char **argv)
                                                .has_arg = required_argument },
        { .val = 'p',   .name = "projid",       .has_arg = required_argument },
        { .val = 'P',   .name = "default-prj",  .has_arg = no_argument },
+       { .val = 'r',   .name = "reset",        .has_arg = no_argument },
        { .val = 'u',   .name = "user",         .has_arg = required_argument },
        { .val = 'U',   .name = "default-usr",  .has_arg = no_argument },
        { .val = LFS_POOL_OPT,
@@ -7738,7 +7886,7 @@ int lfs_setquota(int argc, char **argv)
                                   * so it can be used as a marker that qc_type
                                   * isn't reinitialized from command line
                                   */
-       while ((c = getopt_long(argc, argv, "b:B:dDg:Ghi:I:p:Pu:U",
+       while ((c = getopt_long(argc, argv, "b:B:dDg:Ghi:I:p:Pru:U",
                long_opts, NULL)) != -1) {
                switch (c) {
                case 'U':
@@ -7867,6 +8015,9 @@ quota_type_def:
                                                LUSTRE_Q_SETDEFAULT_POOL :
                                                LUSTRE_Q_SETQUOTAPOOL;
                        break;
+               case 'r':
+                       qctl->qc_cmd = LUSTRE_Q_RESETQID;
+                       break;
                default:
                        fprintf(stderr,
                                "%s setquota: unrecognized option '%s'\n",
@@ -7887,7 +8038,7 @@ quota_type_def:
        }
 
        if (!use_default && qctl->qc_cmd != LUSTRE_Q_DELETEQID &&
-           limit_mask == 0) {
+           qctl->qc_cmd != LUSTRE_Q_RESETQID && limit_mask == 0) {
                fprintf(stderr,
                        "%s setquota: at least one limit must be specified\n",
                        progname);
@@ -7895,10 +8046,10 @@ quota_type_def:
                goto out;
        }
 
-       if ((use_default || qctl->qc_cmd == LUSTRE_Q_DELETEQID)  &&
-           limit_mask != 0) {
+       if ((use_default || qctl->qc_cmd == LUSTRE_Q_DELETEQID ||
+            qctl->qc_cmd == LUSTRE_Q_RESETQID) && limit_mask != 0) {
                fprintf(stderr,
-                       "%s setquota: limits should not be specified when using default quota or deleting quota ID\n",
+                       "%s setquota: limits should not be specified when using default quota, deleting or resetting quota ID\n",
                        progname);
                rc = CMD_HELP;
                goto out;
@@ -7912,9 +8063,10 @@ quota_type_def:
                goto out;
        }
 
-       if (qctl->qc_cmd == LUSTRE_Q_DELETEQID  && qctl->qc_id == 0) {
+       if ((qctl->qc_cmd == LUSTRE_Q_DELETEQID ||
+            qctl->qc_cmd == LUSTRE_Q_RESETQID)  && qctl->qc_id == 0) {
                fprintf(stderr,
-                       "%s setquota: can not delete root user/group/project\n",
+                       "%s setquota: can not delete or reset root user/group/project\n",
                        progname);
                rc = CMD_HELP;
                goto out;
@@ -7991,7 +8143,11 @@ quota_type_def:
        dqb->dqb_valid |= (limit_mask & (BHLIMIT | BSLIMIT)) ? QIF_BLIMITS : 0;
        dqb->dqb_valid |= (limit_mask & (IHLIMIT | ISLIMIT)) ? QIF_ILIMITS : 0;
 
-       rc = llapi_quotactl(mnt, qctl);
+       if (qctl->qc_cmd == LUSTRE_Q_RESETQID)
+               rc = lfs_reset_quota(mnt, qctl);
+       else
+               rc = llapi_quotactl(mnt, qctl);
+
        if (rc) {
                if (*obd_type)
                        fprintf(stderr,