Whamcloud - gitweb
LU-6245 server: remove types abstraction from quota/target/nodemap code
[fs/lustre-release.git] / lustre / quota / qmt_entry.c
index ee57900..d0374cf 100644 (file)
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2012 Intel, Inc.
+ * Copyright (c) 2012, 2013, Intel Corporation.
  * Use is subject to license terms.
  *
  * Author: Johann Lombardi <johann.lombardi@intel.com>
  * Author: Niu    Yawei    <yawei.niu@intel.com>
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
 #define DEBUG_SUBSYSTEM S_LQUOTA
 
 #include "qmt_internal.h"
@@ -47,7 +43,7 @@ static void qmt_lqe_init(struct lquota_entry *lqe, void *arg)
        LASSERT(lqe_is_master(lqe));
 
        lqe->lqe_revoke_time = 0;
-       cfs_init_rwsem(&lqe->lqe_sem);
+       init_rwsem(&lqe->lqe_sem);
 }
 
 /*
@@ -116,16 +112,16 @@ static void qmt_lqe_debug(struct lquota_entry *lqe, void *arg,
        struct qmt_pool_info    *pool = (struct qmt_pool_info *)arg;
 
        libcfs_debug_vmsg2(msgdata, fmt, args,
-                          "qmt:%s pool:%d-%s id:"LPU64" enforced:%d hard:"LPU64
-                          " soft:"LPU64" granted:"LPU64" time:"LPU64" qunit:"
-                          LPU64" edquot:%d revoke:"LPU64"\n",
+                          "qmt:%s pool:%d-%s id:%llu enforced:%d hard:%llu"
+                          " soft:%llu granted:%llu time:%llu qunit:"
+                          "%llu edquot:%d may_rel:%llu revoke:%llu\n",
                           pool->qpi_qmt->qmt_svname,
                           pool->qpi_key & 0x0000ffff,
                           RES_NAME(pool->qpi_key >> 16),
                           lqe->lqe_id.qid_uid, lqe->lqe_enforced,
                           lqe->lqe_hardlimit, lqe->lqe_softlimit,
                           lqe->lqe_granted, lqe->lqe_gracetime,
-                          lqe->lqe_qunit, lqe->lqe_edquot,
+                          lqe->lqe_qunit, lqe->lqe_edquot, lqe->lqe_may_rel,
                           lqe->lqe_revoke_time);
 }
 
@@ -261,21 +257,16 @@ int qmt_glb_write(const struct lu_env *env, struct thandle *th,
 
        LQUOTA_DEBUG(lqe, "write glb");
 
-       if (!lqe->lqe_enforced && lqe->lqe_granted == 0 &&
-           lqe->lqe_id.qid_uid != 0) {
-               /* quota isn't enforced any more for this entry and there is no
-                * more space granted to slaves, let's just remove the entry
-                * from the index */
-               rec = NULL;
-       } else {
-               rec = &qti->qti_glb_rec;
+       /* never delete the entry even when the id isn't enforced and
+        * no any guota granted, otherwise, this entry will not be
+        * synced to slave during the reintegration. */
+       rec = &qti->qti_glb_rec;
 
-               /* fill global index with updated quota settings */
-               rec->qbr_granted   = lqe->lqe_granted;
-               rec->qbr_hardlimit = lqe->lqe_hardlimit;
-               rec->qbr_softlimit = lqe->lqe_softlimit;
-               rec->qbr_time      = lqe->lqe_gracetime;
-       }
+       /* fill global index with updated quota settings */
+       rec->qbr_granted   = lqe->lqe_granted;
+       rec->qbr_hardlimit = lqe->lqe_hardlimit;
+       rec->qbr_softlimit = lqe->lqe_softlimit;
+       rec->qbr_time      = lqe->lqe_gracetime;
 
        /* write new quota settings */
        rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id,
@@ -336,7 +327,7 @@ int qmt_slv_read(const struct lu_env *env, struct lquota_entry *lqe,
                RETURN(rc);
        }
 
-       LQUOTA_DEBUG(lqe, "successful slv read "LPU64, *granted);
+       LQUOTA_DEBUG(lqe, "successful slv read %llu", *granted);
 
        RETURN(0);
 }
@@ -369,26 +360,22 @@ int qmt_slv_write(const struct lu_env *env, struct thandle *th,
        LASSERT(lqe_is_master(lqe));
        LASSERT(lqe_is_locked(lqe));
 
-       LQUOTA_DEBUG(lqe, "write slv "DFID" granted:"LPU64,
+       LQUOTA_DEBUG(lqe, "write slv "DFID" granted:%llu",
                     PFID(lu_object_fid(&slv_obj->do_lu)), granted);
 
-       if (granted == 0) {
-               /* this slave does not own any quota space for this ID any more,
-                * so let's just remove the entry from the index */
-               rec = NULL;
-       } else {
-               rec = &qti->qti_slv_rec;
+       /* never delete the entry, otherwise, it'll not be transferred
+        * to slave during reintegration. */
+       rec = &qti->qti_slv_rec;
 
-               /* updated space granted to this slave */
-               rec->qsr_granted = granted;
-       }
+       /* updated space granted to this slave */
+       rec->qsr_granted = granted;
 
        /* write new granted space */
        rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id,
                               (struct dt_rec *)rec, flags, ver);
        if (rc) {
                LQUOTA_ERROR(lqe, "failed to update slave index "DFID" granted:"
-                            LPU64, PFID(lu_object_fid(&slv_obj->do_lu)),
+                            "%llu", PFID(lu_object_fid(&slv_obj->do_lu)),
                             granted);
                RETURN(rc);
        }
@@ -412,3 +399,297 @@ int qmt_validate_limits(struct lquota_entry *lqe, __u64 hard, __u64 soft)
                RETURN(-EINVAL);
        RETURN(0);
 }
+
+/*
+ * Set/clear edquot flag after quota space allocation/release or settings
+ * change. Slaves will be notified of changes via glimpse on per-ID lock
+ *
+ * \param lqe - is the quota entry to check
+ * \param now - is the current time in second used for grace time managment
+ */
+void qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now)
+{
+       struct qmt_pool_info    *pool = lqe2qpi(lqe);
+       ENTRY;
+
+       if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0)
+               RETURN_EXIT;
+
+       if (!lqe->lqe_edquot) {
+               /* space exhausted flag not set, let's check whether it is time
+                * to set the flag */
+
+               if (!qmt_space_exhausted(lqe, now))
+                       /* the qmt still has available space */
+                       RETURN_EXIT;
+
+               /* See comment in qmt_adjust_qunit(). LU-4139 */
+               if (qmt_hard_exhausted(lqe) ||
+                   pool->qpi_key >> 16 != LQUOTA_RES_DT) {
+                       /* we haven't reached the minimal qunit yet so there is
+                        * still hope that the rebalancing process might free
+                        * up some quota space */
+                       if (lqe->lqe_qunit != pool->qpi_least_qunit)
+                               RETURN_EXIT;
+
+                       /* least qunit value not sent to all slaves yet */
+                       if (lqe->lqe_revoke_time == 0)
+                               RETURN_EXIT;
+
+                       /* Let's give more time to slave to release space */
+                       if (lqe->lqe_may_rel != 0 &&
+                           cfs_time_before_64(cfs_time_shift_64(
+                                                       -QMT_REBA_TIMEOUT),
+                                              lqe->lqe_revoke_time))
+                               RETURN_EXIT;
+               } else {
+                       if (lqe->lqe_qunit > pool->qpi_soft_least_qunit)
+                               RETURN_EXIT;
+               }
+
+               /* set edquot flag */
+               lqe->lqe_edquot = true;
+       } else {
+               /* space exhausted flag set, let's check whether it is time to
+                * clear it */
+
+               if (qmt_space_exhausted(lqe, now))
+                       /* the qmt still has not space */
+                       RETURN_EXIT;
+
+               if (lqe->lqe_hardlimit != 0 &&
+                   lqe->lqe_granted + pool->qpi_least_qunit >
+                                                       lqe->lqe_hardlimit)
+                       /* we clear the flag only once at least one least qunit
+                        * is available */
+                       RETURN_EXIT;
+
+               /* clear edquot flag */
+               lqe->lqe_edquot = false;
+       }
+
+       LQUOTA_DEBUG(lqe, "changing edquot flag");
+
+       /* let's notify slave by issuing glimpse on per-ID lock.
+        * the rebalance thread will take care of this */
+       qmt_id_lock_notify(pool->qpi_qmt, lqe);
+       EXIT;
+}
+
+/* Using least_qunit when over block softlimit will seriously impact the
+ * write performance, we need to do some special tweaking on that. */
+static __u64 qmt_calc_softlimit(struct lquota_entry *lqe, bool *oversoft)
+{
+       struct qmt_pool_info *pool = lqe2qpi(lqe);
+
+       LASSERT(lqe->lqe_softlimit != 0);
+       *oversoft = false;
+       /* No need to do special tweaking for inode limit */
+       if (pool->qpi_key >> 16 != LQUOTA_RES_DT)
+               return lqe->lqe_softlimit;
+
+       if (lqe->lqe_granted <= lqe->lqe_softlimit +
+                               pool->qpi_soft_least_qunit) {
+               return lqe->lqe_softlimit;
+       } else if (lqe->lqe_hardlimit != 0) {
+               *oversoft = true;
+               return lqe->lqe_hardlimit;
+       } else {
+               *oversoft = true;
+               return 0;
+       }
+}
+
+/*
+ * Try to grant more quota space back to slave.
+ *
+ * \param lqe     - is the quota entry for which we would like to allocate more
+ *                  space
+ * \param granted - is how much was already granted as part of the request
+ *                  processing
+ * \param spare   - is how much unused quota space the slave already owns
+ *
+ * \retval return how additional space can be granted to the slave
+ */
+__u64 qmt_alloc_expand(struct lquota_entry *lqe, __u64 granted, __u64 spare)
+{
+       struct qmt_pool_info    *pool = lqe2qpi(lqe);
+       __u64                    remaining, qunit;
+       int                      slv_cnt;
+
+       LASSERT(lqe->lqe_enforced && lqe->lqe_qunit != 0);
+
+       slv_cnt = lqe2qpi(lqe)->qpi_slv_nr[lqe->lqe_site->lqs_qtype];
+       qunit   = lqe->lqe_qunit;
+
+       /* See comment in qmt_adjust_qunit(). LU-4139. */
+       if (lqe->lqe_softlimit != 0) {
+               bool oversoft;
+               remaining = qmt_calc_softlimit(lqe, &oversoft);
+               if (remaining == 0)
+                       remaining = lqe->lqe_granted +
+                                   pool->qpi_soft_least_qunit;
+       } else {
+               remaining = lqe->lqe_hardlimit;
+       }
+
+       if (lqe->lqe_granted >= remaining)
+               RETURN(0);
+
+       remaining -= lqe->lqe_granted;
+
+       do {
+               if (spare >= qunit)
+                       break;
+
+               granted &= (qunit - 1);
+
+               if (remaining > (slv_cnt * qunit) >> 1) {
+                       /* enough room to grant more space w/o additional
+                        * shrinking ... at least for now */
+                       remaining -= (slv_cnt * qunit) >> 1;
+               } else if (qunit != pool->qpi_least_qunit) {
+                       qunit >>= 2;
+                       continue;
+               }
+
+               granted &= (qunit - 1);
+               if (spare > 0)
+                       RETURN(min_t(__u64, qunit - spare, remaining));
+               else
+                       RETURN(min_t(__u64, qunit - granted, remaining));
+       } while (qunit >= pool->qpi_least_qunit);
+
+       RETURN(0);
+}
+
+/*
+ * Adjust qunit size according to quota limits and total granted count.
+ * The caller must have locked the lqe.
+ *
+ * \param env - the environment passed by the caller
+ * \param lqe - is the qid entry to be adjusted
+ */
+void qmt_adjust_qunit(const struct lu_env *env, struct lquota_entry *lqe)
+{
+       struct qmt_pool_info    *pool = lqe2qpi(lqe);
+       int                      slv_cnt;
+       __u64                    qunit, limit, qunit2 = 0;
+       ENTRY;
+
+       LASSERT(lqe_is_locked(lqe));
+
+       if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0)
+               /* no quota limits */
+               RETURN_EXIT;
+
+       /* record how many slaves have already registered */
+       slv_cnt = pool->qpi_slv_nr[lqe->lqe_site->lqs_qtype];
+       if (slv_cnt == 0)
+               /* wait for at least one slave to join */
+               RETURN_EXIT;
+
+       /* Qunit calculation is based on soft limit, if any, hard limit
+        * otherwise. This means that qunit is shrunk to the minimum when
+        * beyond the soft limit. This will impact performance, but that's the
+        * price of an accurate grace time management. */
+       if (lqe->lqe_softlimit != 0) {
+               bool oversoft;
+               /* As a compromise of write performance and the grace time
+                * accuracy, the block qunit size will be shrunk to
+                * qpi_soft_least_qunit when over softlimit. LU-4139. */
+               limit = qmt_calc_softlimit(lqe, &oversoft);
+               if (oversoft)
+                       qunit2 = pool->qpi_soft_least_qunit;
+               if (limit == 0)
+                       GOTO(done, qunit = qunit2);
+       } else if (lqe->lqe_hardlimit != 0) {
+               limit = lqe->lqe_hardlimit;
+       } else {
+               LQUOTA_ERROR(lqe, "enforced bit set, but neither hard nor soft "
+                            "limit are set");
+               RETURN_EXIT;
+       }
+
+       qunit = lqe->lqe_qunit == 0 ? pool->qpi_least_qunit : lqe->lqe_qunit;
+
+       /* The qunit value is computed as follows: limit / (2 * slv_cnt).
+        * Then 75% of the quota space can be granted with current qunit value.
+        * The remaining 25% are then used with reduced qunit size (by a factor
+        * of 4) which is then divided in a similar manner.
+        *
+        * |---------------------limit---------------------|
+        * |-------limit / 2-------|-limit / 4-|-limit / 4-|
+        * |qunit|qunit|qunit|qunit|           |           |
+        * |----slv_cnt * qunit----|           |           |
+        * |-grow limit-|          |           |           |
+        * |--------------shrink limit---------|           |
+        * |---space granted in qunit chunks---|-remaining-|
+        *                                    /             \
+        *                                   /               \
+        *                                  /                 \
+        *                                 /                   \
+        *                                /                     \
+        *     qunit >>= 2;            |qunit*slv_cnt|qunit*slv_cnt|
+        *                             |---space in qunit---|remain|
+        *                                  ...                               */
+       if (qunit == pool->qpi_least_qunit ||
+           limit >= lqe->lqe_granted + ((slv_cnt * qunit) >> 1)) {
+               /* current qunit value still fits, let's see if we can afford to
+                * increase qunit now ...
+                * To increase qunit again, we have to be under 25% */
+               while (qunit && limit >= lqe->lqe_granted + 6 * qunit * slv_cnt)
+                       qunit <<= 2;
+
+               if (!qunit) {
+                       qunit = limit;
+                       do_div(qunit, 2 * slv_cnt);
+               }
+
+       } else {
+               /* shrink qunit until we find a suitable value */
+               while (qunit > pool->qpi_least_qunit &&
+                      limit < lqe->lqe_granted + ((slv_cnt * qunit) >> 1))
+                       qunit >>= 2;
+       }
+
+       if (qunit2 && qunit > qunit2)
+               qunit = qunit2;
+done:
+       if (lqe->lqe_qunit == qunit)
+               /* keep current qunit */
+               RETURN_EXIT;
+
+       LQUOTA_DEBUG(lqe, "%s qunit to %llu",
+                    lqe->lqe_qunit < qunit ? "increasing" : "decreasing",
+                    qunit);
+
+       /* store new qunit value */
+       swap(lqe->lqe_qunit, qunit);
+
+       /* reset revoke time */
+       lqe->lqe_revoke_time = 0;
+
+       if (lqe->lqe_qunit < qunit)
+               /* let's notify slave of qunit shrinking */
+               qmt_id_lock_notify(pool->qpi_qmt, lqe);
+       else if (lqe->lqe_qunit == pool->qpi_least_qunit)
+               /* initial qunit value is the smallest one */
+               lqe->lqe_revoke_time = cfs_time_current_64();
+       EXIT;
+}
+
+/*
+ * Adjust qunit & edquot flag in case it wasn't initialized already (e.g.
+ * limit set while no slaves were connected yet)
+ */
+void qmt_revalidate(const struct lu_env *env, struct lquota_entry *lqe)
+{
+       if (lqe->lqe_qunit == 0) {
+               /* lqe was read from disk, but neither qunit, nor edquot flag
+                * were initialized */
+               qmt_adjust_qunit(env, lqe);
+               if (lqe->lqe_qunit != 0)
+                       qmt_adjust_edquot(lqe, cfs_time_current_sec());
+       }
+}