/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 only, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is included * in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU General Public License * version 2 along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA * * GPL HEADER END */ /* * Copyright (c) 2012, 2016, Intel Corporation. * Use is subject to license terms. * * Author: Johann Lombardi * Author: Niu Yawei */ #define DEBUG_SUBSYSTEM S_LQUOTA #include "qmt_internal.h" /* * Initialize qmt-specific fields of quota entry. * * \param lqe - is the quota entry to initialize * \param arg - is the pointer to the qmt_pool_info structure */ static void qmt_lqe_init(struct lquota_entry *lqe, void *arg) { LASSERT(lqe_is_master(lqe)); lqe->lqe_revoke_time = 0; init_rwsem(&lqe->lqe_sem); } /* Apply the default quota setting to the specified quota entry * * \param env - is the environment passed by the caller * \param pool - is the quota pool of the quota entry * \param lqe - is the lquota_entry object to apply default quota on * \param create_record - if true, an global quota record will be created and * write to the disk. * * \retval 0 : success * \retval -ve : other appropriate errors */ int qmt_lqe_set_default(const struct lu_env *env, struct qmt_pool_info *pool, struct lquota_entry *lqe, bool create_record) { struct lquota_entry *lqe_def; int rc = 0; ENTRY; if (lqe->lqe_id.qid_uid == 0) RETURN(0); lqe_def = pool->qpi_grace_lqe[lqe_qtype(lqe)]; LQUOTA_DEBUG(lqe, "inherit default quota"); lqe->lqe_is_default = true; lqe->lqe_hardlimit = lqe_def->lqe_hardlimit; lqe->lqe_softlimit = lqe_def->lqe_softlimit; if (create_record) { lqe->lqe_uptodate = true; rc = qmt_set_with_lqe(env, pool->qpi_qmt, lqe, 0, 0, LQUOTA_GRACE_FLAG(0, LQUOTA_FLAG_DEFAULT), QIF_TIMES, true, false); if (rc != 0) LQUOTA_ERROR(lqe, "failed to create the global quota" " record: %d", rc); } if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0) lqe->lqe_enforced = false; else lqe->lqe_enforced = true; RETURN(rc); } /* * Update a lquota entry. This is done by reading quota settings from the global * index. The lquota entry must be write locked. * * \param env - the environment passed by the caller * \param lqe - is the quota entry to refresh * \param arg - is the pointer to the qmt_pool_info structure * \param find - don't create lqe on disk in case of ENOENT if true */ static int qmt_lqe_read(const struct lu_env *env, struct lquota_entry *lqe, void *arg, bool find) { struct qmt_thread_info *qti = qmt_info(env); struct qmt_pool_info *pool = (struct qmt_pool_info *)arg; int rc; ENTRY; LASSERT(lqe_is_master(lqe)); /* read record from disk */ rc = lquota_disk_read(env, pool->qpi_glb_obj[lqe->lqe_site->lqs_qtype], &lqe->lqe_id, (struct dt_rec *)&qti->qti_glb_rec); switch (rc) { case -ENOENT: if (find) RETURN(-ENOENT); qmt_lqe_set_default(env, pool, lqe, true); break; case 0: /* copy quota settings from on-disk record */ lqe->lqe_granted = qti->qti_glb_rec.qbr_granted; lqe->lqe_hardlimit = qti->qti_glb_rec.qbr_hardlimit; lqe->lqe_softlimit = qti->qti_glb_rec.qbr_softlimit; lqe->lqe_gracetime = LQUOTA_GRACE(qti->qti_glb_rec.qbr_time); if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0 && (LQUOTA_FLAG(qti->qti_glb_rec.qbr_time) & LQUOTA_FLAG_DEFAULT)) qmt_lqe_set_default(env, pool, lqe, false); break; default: LQUOTA_ERROR(lqe, "failed to read quota entry from disk, rc:%d", rc); RETURN(rc); } if (lqe->lqe_id.qid_uid == 0 || (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0)) /* {hard,soft}limit=0 means no quota enforced */ lqe->lqe_enforced = false; else lqe->lqe_enforced = true; if (qmt_pool_global(pool)) lqe->lqe_is_global = 1; LQUOTA_DEBUG(lqe, "read"); RETURN(0); } /* * Print lqe information for debugging. * * \param lqe - is the quota entry to debug * \param arg - is the pointer to the qmt_pool_info structure * \param msgdata - debug message * \param fmt - format of debug message */ static void qmt_lqe_debug(struct lquota_entry *lqe, void *arg, struct libcfs_debug_msg_data *msgdata, struct va_format *vaf) { struct qmt_pool_info *pool = (struct qmt_pool_info *)arg; libcfs_debug_msg(msgdata, "%pV qmt:%s pool:%s-%s id:%llu enforced:%d hard:%llu soft:%llu granted:%llu time:%llu qunit: %llu edquot:%d may_rel:%llu revoke:%lld default:%s\n", vaf, pool->qpi_qmt->qmt_svname, RES_NAME(pool->qpi_rtype), pool->qpi_name, lqe->lqe_id.qid_uid, lqe->lqe_enforced, lqe->lqe_hardlimit, lqe->lqe_softlimit, lqe->lqe_granted, lqe->lqe_gracetime, lqe->lqe_qunit, lqe->lqe_edquot, lqe->lqe_may_rel, lqe->lqe_revoke_time, lqe->lqe_is_default ? "yes" : "no"); } /* * Vector of quota entry operations supported on the master */ struct lquota_entry_operations qmt_lqe_ops = { .lqe_init = qmt_lqe_init, .lqe_read = qmt_lqe_read, .lqe_debug = qmt_lqe_debug, }; /* * Reserve enough credits to update records in both the global index and * the slave index identified by \slv_obj * * \param env - is the environment passed by the caller * \param lqe - is the quota entry associated with the identifier * subject to the change. If it is NULL lqes array is * taken from env with qti_lqes_env(env). * \param slv_obj - is the dt_object associated with the index file * \param sync - make transaction sync if true */ struct thandle *qmt_trans_start_with_slv(const struct lu_env *env, struct lquota_entry *lqe, struct dt_object *slv_obj, bool sync) { struct qmt_device *qmt; struct thandle *th; struct lquota_entry **lqes; struct qmt_lqe_restore *restore; int rc, i, lqes_cnt; ENTRY; restore = qti_lqes_rstr(env); if (!lqe) { lqes_cnt = qti_lqes_cnt(env); lqes = qti_lqes(env); } else { lqes_cnt = 1; lqes = &lqe; } /* qmt is the same for all lqes, so take it from the 1st */ qmt = lqe2qpi(lqes[0])->qpi_qmt; if (slv_obj != NULL) LQUOTA_DEBUG(lqes[0], "declare write for slv "DFID, PFID(lu_object_fid(&slv_obj->do_lu))); /* start transaction */ th = dt_trans_create(env, qmt->qmt_child); if (IS_ERR(th)) RETURN(th); if (sync) /* quota settings on master are updated synchronously for the * time being */ th->th_sync = 1; /* reserve credits for global index update */ for (i = 0; i < lqes_cnt; i++) { rc = lquota_disk_declare_write(env, th, LQE_GLB_OBJ(lqes[i]), &lqes[i]->lqe_id); if (rc) GOTO(out, rc); } if (slv_obj != NULL) { /* reserve credits for slave index update */ rc = lquota_disk_declare_write(env, th, slv_obj, &lqe->lqe_id); if (rc) GOTO(out, rc); } /* start transaction */ rc = dt_trans_start_local(env, qmt->qmt_child, th); if (rc) GOTO(out, rc); EXIT; out: if (rc) { dt_trans_stop(env, qmt->qmt_child, th); th = ERR_PTR(rc); LQUOTA_ERROR(lqe, "failed to slv declare write for "DFID ", rc:%d", PFID(lu_object_fid(&slv_obj->do_lu)), rc); } else { for (i = 0; i < lqes_cnt; i++) { restore[i].qlr_hardlimit = lqes[i]->lqe_hardlimit; restore[i].qlr_softlimit = lqes[i]->lqe_softlimit; restore[i].qlr_gracetime = lqes[i]->lqe_gracetime; restore[i].qlr_granted = lqes[i]->lqe_granted; restore[i].qlr_qunit = lqes[i]->lqe_qunit; } } return th; } /* * Reserve enough credits to update a record in the global index * * \param env - is the environment passed by the caller * \param lqe - is the quota entry to be modified in the global index * \param restore - is a temporary storage for current quota settings which will * be restored if something goes wrong at index update time. */ struct thandle *qmt_trans_start(const struct lu_env *env, struct lquota_entry *lqe) { LQUOTA_DEBUG(lqe, "declare write"); return qmt_trans_start_with_slv(env, lqe, NULL, true); } int qmt_glb_write_lqes(const struct lu_env *env, struct thandle *th, __u32 flags, __u64 *ver) { int i, rc; rc = 0; for (i = 0; i < qti_lqes_cnt(env); i++) { rc = qmt_glb_write(env, th, qti_lqes(env)[i], flags, ver); if (rc) break; } return rc; } /* * Update record associated with a quota entry in the global index. * If LQUOTA_BUMP_VER is set, then the global index version must also be * bumped. * The entry must be at least read locked, dirty and up-to-date. * * \param env - the environment passed by the caller * \param th - is the transaction handle to be used for the disk writes * \param lqe - is the quota entry to udpate * \param obj - is the dt_object associated with the index file * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER. * \param ver - is used to return the new version of the index. * * \retval - 0 on success and lqe dirty flag cleared, * appropriate error on failure and uptodate flag cleared. */ int qmt_glb_write(const struct lu_env *env, struct thandle *th, struct lquota_entry *lqe, __u32 flags, __u64 *ver) { struct qmt_thread_info *qti = qmt_info(env); struct lquota_glb_rec *rec; int rc; ENTRY; LASSERT(lqe != NULL); LASSERT(lqe_is_master(lqe)); LASSERT(lqe_is_locked(lqe)); LASSERT(lqe->lqe_uptodate); LASSERT((flags & ~(LQUOTA_BUMP_VER | LQUOTA_SET_VER)) == 0); LQUOTA_DEBUG(lqe, "write glb"); /* never delete the entry even when the id isn't enforced and * no any guota granted, otherwise, this entry will not be * synced to slave during the reintegration. */ rec = &qti->qti_glb_rec; /* fill global index with updated quota settings */ rec->qbr_granted = lqe->lqe_granted; if (lqe->lqe_is_default) { rec->qbr_hardlimit = 0; rec->qbr_softlimit = 0; rec->qbr_time = LQUOTA_GRACE_FLAG(0, LQUOTA_FLAG_DEFAULT); } else { rec->qbr_hardlimit = lqe->lqe_hardlimit; rec->qbr_softlimit = lqe->lqe_softlimit; rec->qbr_time = lqe->lqe_gracetime; } /* write new quota settings */ rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id, (struct dt_rec *)rec, flags, ver); if (rc) /* we failed to write the new quota settings to disk, report * error to caller who will restore the initial value */ LQUOTA_ERROR(lqe, "failed to update global index, rc:%d", rc); RETURN(rc); } /* * Read from disk how much quota space is allocated to a slave. * This is done by reading records from the dedicated slave index file. * Return in \granted how much quota space is currently allocated to the * slave. * The entry must be at least read locked. * * \param env - the environment passed by the caller * \param lqe_id - is the quota id associated with the identifier to look-up * in the slave index * \param slv_obj - is the dt_object associated with the slave index * \param granted - is the output parameter where to return how much space * is granted to the slave. * * \retval - 0 on success, appropriate error on failure */ int qmt_slv_read(const struct lu_env *env, union lquota_id *qid, struct dt_object *slv_obj, __u64 *granted) { struct qmt_thread_info *qti = qmt_info(env); struct lquota_slv_rec *slv_rec = &qti->qti_slv_rec; int rc; ENTRY; CDEBUG(D_QUOTA, "read id:%llu form slv "DFID"\n", qid->qid_uid, PFID(lu_object_fid(&slv_obj->do_lu))); /* read slave record from disk */ rc = lquota_disk_read(env, slv_obj, qid, (struct dt_rec *)slv_rec); switch (rc) { case -ENOENT: *granted = 0; break; case 0: /* extract granted from on-disk record */ *granted = slv_rec->qsr_granted; break; default: CERROR("Failed to read slave record for %llu from "DFID"\n", qid->qid_uid, PFID(lu_object_fid(&slv_obj->do_lu))); RETURN(rc); } CDEBUG(D_QUOTA, "Successful slv read %llu\n", *granted); RETURN(0); } /* * Update record in slave index file. * The entry must be at least read locked. * * \param env - the environment passed by the caller * \param th - is the transaction handle to be used for the disk writes * \param lqe - is the dirty quota entry which will be updated at the same time * as the slave index * \param slv_obj - is the dt_object associated with the slave index * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER. * \param ver - is used to return the new version of the index. * \param granted - is the new amount of quota space owned by the slave * * \retval - 0 on success, appropriate error on failure */ int qmt_slv_write(const struct lu_env *env, struct thandle *th, struct lquota_entry *lqe, struct dt_object *slv_obj, __u32 flags, __u64 *ver, __u64 granted) { struct qmt_thread_info *qti = qmt_info(env); struct lquota_slv_rec *rec; int rc; ENTRY; LASSERT(lqe != NULL); LASSERT(lqe_is_master(lqe)); LASSERT(lqe_is_locked(lqe)); LQUOTA_DEBUG(lqe, "write slv "DFID" granted:%llu", PFID(lu_object_fid(&slv_obj->do_lu)), granted); /* never delete the entry, otherwise, it'll not be transferred * to slave during reintegration. */ rec = &qti->qti_slv_rec; /* updated space granted to this slave */ rec->qsr_granted = granted; /* write new granted space */ rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id, (struct dt_rec *)rec, flags, ver); if (rc) { LQUOTA_ERROR(lqe, "failed to update slave index "DFID" granted:%llu", PFID(lu_object_fid(&slv_obj->do_lu)), granted); RETURN(rc); } RETURN(0); } /* * Check whether new limits are valid for this pool * * \param lqe - is the quota entry subject to the setquota * \param hard - is the new hard limit * \param soft - is the new soft limit */ int qmt_validate_limits(struct lquota_entry *lqe, __u64 hard, __u64 soft) { ENTRY; if (hard != 0 && soft > hard) /* soft limit must be less than hard limit */ RETURN(-EINVAL); RETURN(0); } /* * Set/clear edquot flag after quota space allocation/release or settings * change. Slaves will be notified of changes via glimpse on per-ID lock * * \param lqe - is the quota entry to check * \param now - is the current time in second used for grace time managment */ bool qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now) { struct qmt_pool_info *pool = lqe2qpi(lqe); ENTRY; if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0) RETURN(false); if (!lqe->lqe_edquot) { /* space exhausted flag not set, let's check whether it is time * to set the flag */ if (!qmt_space_exhausted(lqe, now)) /* the qmt still has available space */ RETURN(false); /* See comment in qmt_adjust_qunit(). LU-4139 */ if (qmt_hard_exhausted(lqe) || pool->qpi_rtype != LQUOTA_RES_DT) { time64_t lapse; /* we haven't reached the minimal qunit yet so there is * still hope that the rebalancing process might free * up some quota space */ if (lqe->lqe_qunit != pool->qpi_least_qunit) RETURN(false); /* least qunit value not sent to all slaves yet */ if (lqe->lqe_revoke_time == 0) RETURN(false); /* Let's give more time to slave to release space */ lapse = ktime_get_seconds() - QMT_REBA_TIMEOUT; if (lqe->lqe_may_rel != 0 && lqe->lqe_revoke_time > lapse) RETURN(false); } else { if (lqe->lqe_qunit > pool->qpi_soft_least_qunit) RETURN(false); } /* set edquot flag */ lqe->lqe_edquot = true; } else { /* space exhausted flag set, let's check whether it is time to * clear it */ if (qmt_space_exhausted(lqe, now)) /* the qmt still has not space */ RETURN(false); if (lqe->lqe_hardlimit != 0 && lqe->lqe_granted + pool->qpi_least_qunit > lqe->lqe_hardlimit) /* we clear the flag only once at least one least qunit * is available */ RETURN(false); /* clear edquot flag */ lqe->lqe_edquot = false; } LQUOTA_DEBUG(lqe, "changing edquot flag"); /* let's notify slave by issuing glimpse on per-ID lock. * the rebalance thread will take care of this */ RETURN(true); } /* Using least_qunit when over block softlimit will seriously impact the * write performance, we need to do some special tweaking on that. */ static __u64 qmt_calc_softlimit(struct lquota_entry *lqe, bool *oversoft) { struct qmt_pool_info *pool = lqe2qpi(lqe); LASSERT(lqe->lqe_softlimit != 0); *oversoft = false; /* No need to do special tweaking for inode limit */ if (pool->qpi_rtype != LQUOTA_RES_DT) return lqe->lqe_softlimit; if (lqe->lqe_granted <= lqe->lqe_softlimit + pool->qpi_soft_least_qunit) { return lqe->lqe_softlimit; } else if (lqe->lqe_hardlimit != 0) { *oversoft = true; return lqe->lqe_hardlimit; } else { *oversoft = true; return 0; } } /* * Try to grant more quota space back to slave. * * \param lqe - is the quota entry for which we would like to allocate more * space * \param granted - is how much was already granted as part of the request * processing * \param spare - is how much unused quota space the slave already owns * * \retval return how additional space can be granted to the slave */ __u64 qmt_alloc_expand(struct lquota_entry *lqe, __u64 granted, __u64 spare) { struct qmt_pool_info *pool = lqe2qpi(lqe); __u64 remaining, qunit; int slv_cnt; LASSERT(lqe->lqe_enforced && lqe->lqe_qunit != 0); slv_cnt = qpi_slv_nr(lqe2qpi(lqe), lqe_qtype(lqe)); qunit = lqe->lqe_qunit; /* See comment in qmt_adjust_qunit(). LU-4139. */ if (lqe->lqe_softlimit != 0) { bool oversoft; remaining = qmt_calc_softlimit(lqe, &oversoft); if (remaining == 0) remaining = lqe->lqe_granted + pool->qpi_soft_least_qunit; } else { remaining = lqe->lqe_hardlimit; } if (lqe->lqe_granted >= remaining) RETURN(0); remaining -= lqe->lqe_granted; do { if (spare >= qunit) break; granted &= (qunit - 1); if (remaining > (slv_cnt * qunit) >> 1) { /* enough room to grant more space w/o additional * shrinking ... at least for now */ remaining -= (slv_cnt * qunit) >> 1; } else if (qunit != pool->qpi_least_qunit) { qunit >>= 2; continue; } granted &= (qunit - 1); if (spare > 0) RETURN(min_t(__u64, qunit - spare, remaining)); else RETURN(min_t(__u64, qunit - granted, remaining)); } while (qunit >= pool->qpi_least_qunit); RETURN(0); } /* * Adjust qunit size according to quota limits and total granted count. * The caller must have locked the lqe. * * \param env - the environment passed by the caller * \param lqe - is the qid entry to be adjusted * \retval true - need reseed glbe array */ bool qmt_adjust_qunit(const struct lu_env *env, struct lquota_entry *lqe) { struct qmt_pool_info *pool = lqe2qpi(lqe); bool need_reseed = false; int slv_cnt; __u64 qunit, limit, qunit2 = 0; ENTRY; LASSERT(lqe_is_locked(lqe)); if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0) /* no quota limits */ RETURN(need_reseed); /* record how many slaves have already registered */ slv_cnt = qpi_slv_nr(pool, lqe_qtype(lqe)); if (slv_cnt == 0) { /* Pool hasn't slaves anymore. Qunit will be adjusted * again when new slaves would be added. */ if (lqe->lqe_qunit) { qunit = 0; GOTO(done, qunit); } /* wait for at least one slave to join */ RETURN(need_reseed); } /* Qunit calculation is based on soft limit, if any, hard limit * otherwise. This means that qunit is shrunk to the minimum when * beyond the soft limit. This will impact performance, but that's the * price of an accurate grace time management. */ if (lqe->lqe_softlimit != 0) { bool oversoft; /* As a compromise of write performance and the grace time * accuracy, the block qunit size will be shrunk to * qpi_soft_least_qunit when over softlimit. LU-4139. */ limit = qmt_calc_softlimit(lqe, &oversoft); if (oversoft) qunit2 = pool->qpi_soft_least_qunit; if (limit == 0) GOTO(done, qunit = qunit2); } else if (lqe->lqe_hardlimit != 0) { limit = lqe->lqe_hardlimit; } else { LQUOTA_ERROR(lqe, "enforced bit set, but neither hard nor soft " "limit are set"); RETURN(need_reseed); } qunit = lqe->lqe_qunit == 0 ? pool->qpi_least_qunit : lqe->lqe_qunit; /* The qunit value is computed as follows: limit / (2 * slv_cnt). * Then 75% of the quota space can be granted with current qunit value. * The remaining 25% are then used with reduced qunit size (by a factor * of 4) which is then divided in a similar manner. * * |---------------------limit---------------------| * |-------limit / 2-------|-limit / 4-|-limit / 4-| * |qunit|qunit|qunit|qunit| | | * |----slv_cnt * qunit----| | | * |-grow limit-| | | | * |--------------shrink limit---------| | * |---space granted in qunit chunks---|-remaining-| * / \ * / \ * / \ * / \ * / \ * qunit >>= 2; |qunit*slv_cnt|qunit*slv_cnt| * |---space in qunit---|remain| * ... */ if (qunit == pool->qpi_least_qunit || limit >= lqe->lqe_granted + ((slv_cnt * qunit) >> 1)) { /* current qunit value still fits, let's see if we can afford to * increase qunit now ... * To increase qunit again, we have to be under 25% */ while (qunit && limit >= lqe->lqe_granted + 6 * qunit * slv_cnt) qunit <<= 2; if (!qunit) { qunit = limit; do_div(qunit, 2 * slv_cnt); } } else { /* shrink qunit until we find a suitable value */ while (qunit > pool->qpi_least_qunit && limit < lqe->lqe_granted + ((slv_cnt * qunit) >> 1)) qunit >>= 2; } if (qunit2 && qunit > qunit2) qunit = qunit2; done: if (lqe->lqe_qunit == qunit) /* keep current qunit */ RETURN(need_reseed); LQUOTA_DEBUG(lqe, "%s qunit to %llu", lqe->lqe_qunit < qunit ? "increasing" : "decreasing", qunit); /* store new qunit value */ swap(lqe->lqe_qunit, qunit); /* reseed glbe array and notify * slave if qunit was shrinked */ need_reseed = true; /* reset revoke time */ lqe->lqe_revoke_time = 0; if (lqe->lqe_qunit >= qunit && (lqe->lqe_qunit == pool->qpi_least_qunit)) { /* initial qunit value is the smallest one */ lqe->lqe_revoke_time = ktime_get_seconds(); } RETURN(need_reseed); } bool qmt_adjust_edquot_qunit_notify(const struct lu_env *env, struct qmt_device *qmt, __u64 now, bool edquot, bool qunit, __u32 qb_flags) { struct lquota_entry *lqe_gl, *lqe; bool need_reseed = false; int i; lqe_gl = qti_lqes_glbl(env); for (i = 0; i < qti_lqes_cnt(env); i++) { lqe = qti_lqes(env)[i]; if (qunit) need_reseed |= qmt_adjust_qunit(env, lqe); if (edquot) need_reseed |= qmt_adjust_edquot(lqe, now); } LASSERT(lqe_gl); if (!lqe_gl->lqe_glbl_data && (req_has_rep(qb_flags) || req_is_rel(qb_flags))) { if (need_reseed) CWARN("%s: can't notify - lge_glbl_data is not set", qmt->qmt_svname); return need_reseed; } if (lqe_gl->lqe_glbl_data && need_reseed) { qmt_seed_glbe_all(env, lqe_gl->lqe_glbl_data, qunit, edquot); qmt_id_lock_notify(qmt, lqe_gl); } return need_reseed; } /* * Adjust qunit & edquot flag in case it wasn't initialized already (e.g. * limit set while no slaves were connected yet) */ bool qmt_revalidate(const struct lu_env *env, struct lquota_entry *lqe) { bool need_notify = false; if (lqe->lqe_qunit == 0) { /* lqe was read from disk, but neither qunit, nor edquot flag * were initialized */ need_notify = qmt_adjust_qunit(env, lqe); if (lqe->lqe_qunit != 0) need_notify |= qmt_adjust_edquot(lqe, ktime_get_real_seconds()); } return need_notify; } void qmt_revalidate_lqes(const struct lu_env *env, struct qmt_device *qmt, __u32 qb_flags) { struct lquota_entry *lqe_gl = qti_lqes_glbl(env); bool need_notify = false; int i; for (i = 0; i < qti_lqes_cnt(env); i++) need_notify |= qmt_revalidate(env, qti_lqes(env)[i]); /* There could be no ID lock to the moment of reconciliation. * As a result lqe global data is not initialised yet. It is ok * for release and report requests. */ if (!lqe_gl->lqe_glbl_data && (req_is_rel(qb_flags) || req_has_rep(qb_flags))) return; if (need_notify) { qmt_seed_glbe(env, lqe_gl->lqe_glbl_data); qmt_id_lock_notify(qmt, lqe_gl); } } void qti_lqes_init(const struct lu_env *env) { struct qmt_thread_info *qti = qmt_info(env); qti->qti_lqes_cnt = 0; qti->qti_glbl_lqe_idx = 0; qti->qti_lqes_num = QMT_MAX_POOL_NUM; } int qti_lqes_add(const struct lu_env *env, struct lquota_entry *lqe) { struct qmt_thread_info *qti = qmt_info(env); if (qti->qti_lqes_cnt > qti->qti_lqes_num) { struct lquota_entry **lqes; lqes = qti->qti_lqes; OBD_ALLOC(lqes, sizeof(lqe) * qti->qti_lqes_num * 2); if (!lqes) return -ENOMEM; memcpy(lqes, qti_lqes(env), qti->qti_lqes_cnt * sizeof(lqe)); /* Don't need to free, if it is the very 1st allocation */ if (qti->qti_lqes_num > QMT_MAX_POOL_NUM) OBD_FREE(qti->qti_lqes, qti->qti_lqes_num * sizeof(lqe)); qti->qti_lqes = lqes; qti->qti_lqes_num *= 2; } if (lqe->lqe_is_global) qti->qti_glbl_lqe_idx = qti->qti_lqes_cnt; qti_lqes(env)[qti->qti_lqes_cnt++] = lqe; /* The pool could be accessed directly from lqe, so take * extra reference that is put in qti_lqes_fini */ qpi_getref(lqe2qpi(lqe)); CDEBUG(D_QUOTA, "LQE %p %lu is added, lqe_cnt %d lqes_num %d\n", lqe, (long unsigned)lqe->lqe_id.qid_uid, qti->qti_lqes_cnt, qti->qti_lqes_num); LASSERT(qti->qti_lqes_num != 0); return 0; } void qti_lqes_del(const struct lu_env *env, int index) { struct lquota_entry **lqes; int lqes_cnt = qti_lqes_cnt(env); int lqep_size = sizeof(struct lquota_entry *); if (index == 0) { /* We can't handle non global lqes correctly without * global lqe located at index 0. If we try to do so, * something goes wrong. */ LQUOTA_ERROR(qti_lqes_glbl(env), "quota: cannot remove lqe at index 0 as it is global"); LASSERT(qti_lqes_glbl(env)->lqe_is_global); return; } lqes = qti_lqes(env); qpi_putref(env, lqe2qpi(lqes[index])); lqe_putref(lqes[index]); memcpy((unsigned char *)lqes + index * lqep_size, (unsigned char *)lqes + (index + 1) * lqep_size, (lqes_cnt - index - 1) * lqep_size); qti_lqes_cnt(env)--; } void qti_lqes_fini(const struct lu_env *env) { struct qmt_thread_info *qti = qmt_info(env); struct lquota_entry **lqes = qti->qti_lqes; int i; lqes = qti_lqes(env); for (i = 0; i < qti->qti_lqes_cnt; i++) { qpi_putref(env, lqe2qpi(lqes[i])); lqe_putref(lqes[i]); } if (qti->qti_lqes_num > QMT_MAX_POOL_NUM) OBD_FREE(qti->qti_lqes, qti->qti_lqes_num * sizeof(struct lquota_entry *)); } inline int qti_lqes_min_qunit(const struct lu_env *env) { int i, min, qunit; for (i = 1, min = qti_lqe_qunit(env, 0); i < qti_lqes_cnt(env); i++) { qunit = qti_lqe_qunit(env, i); if (qunit < min) min = qunit; } return min; } inline int qti_lqes_edquot(const struct lu_env *env) { int i; for (i = 0; i < qti_lqes_cnt(env); i++) { if (qti_lqes(env)[i]->lqe_edquot) return 1; } return 0; } inline int qti_lqes_restore_init(const struct lu_env *env) { int rc = 0; if (qti_lqes_cnt(env) > QMT_MAX_POOL_NUM) { OBD_ALLOC(qmt_info(env)->qti_lqes_rstr, qti_lqes_cnt(env) * sizeof(struct qmt_lqe_restore)); if (!qti_lqes_rstr(env)) rc = -ENOMEM; } return rc; } inline void qti_lqes_restore_fini(const struct lu_env *env) { if (qti_lqes_cnt(env) > QMT_MAX_POOL_NUM) OBD_FREE(qmt_info(env)->qti_lqes_rstr, qti_lqes_cnt(env) * sizeof(struct qmt_lqe_restore)); } inline void qti_lqes_write_lock(const struct lu_env *env) { int i; for (i = 0; i < qti_lqes_cnt(env); i++) lqe_write_lock(qti_lqes(env)[i]); } inline void qti_lqes_write_unlock(const struct lu_env *env) { int i; for (i = 0; i < qti_lqes_cnt(env); i++) lqe_write_unlock(qti_lqes(env)[i]); } #define QMT_INIT_SLV_CNT 64 struct lqe_glbl_data *qmt_alloc_lqe_gd(struct qmt_pool_info *pool, int qtype) { struct lqe_glbl_data *lgd; struct lqe_glbl_entry *lqeg_arr; int slv_cnt, glbe_num; OBD_ALLOC(lgd, sizeof(struct lqe_glbl_data)); if (!lgd) RETURN(NULL); slv_cnt = qpi_slv_nr_by_rtype(pool, qtype); glbe_num = slv_cnt < QMT_INIT_SLV_CNT ? QMT_INIT_SLV_CNT : slv_cnt; OBD_ALLOC(lqeg_arr, sizeof(struct lqe_glbl_entry) * glbe_num); if (!lqeg_arr) { OBD_FREE(lgd, sizeof(struct lqe_glbl_data)); RETURN(NULL); } CDEBUG(D_QUOTA, "slv_cnt %d glbe_num %d\n", slv_cnt, glbe_num); lgd->lqeg_num_used = slv_cnt; lgd->lqeg_num_alloc = glbe_num; lgd->lqeg_arr = lqeg_arr; RETURN(lgd); } void qmt_free_lqe_gd(struct lqe_glbl_data *lgd) { OBD_FREE(lgd->lqeg_arr, sizeof(struct lqe_glbl_entry) * lgd->lqeg_num_alloc); OBD_FREE(lgd, sizeof(struct lqe_glbl_data)); } void qmt_seed_glbe_all(const struct lu_env *env, struct lqe_glbl_data *lgd, bool qunit, bool edquot) { struct rw_semaphore *sem = NULL; struct qmt_pool_info *qpi; int i, j, idx; ENTRY; /* lqes array is sorted by qunit - the first entry has minimum qunit. * Thus start seeding global qunit's array beginning from the 1st lqe * and appropriate pool. If pools overlapped, slaves from this * overlapping get minimum qunit value. * user1: pool1, pool2, pool_glbl; * pool1: OST1; user1_qunit = 10M; * pool2: OST0, OST1, OST2; user1_qunit = 30M; * pool_glbl: OST0, OST1, OST2, OST3; user1_qunit = 160M; * qunit array after seeding should be: * OST0: 30M; OST1: 10M; OST2: 30M; OST3: 160M; */ /* edquot resetup algorythm works fine * with not sorted lqes */ if (qunit) qmt_lqes_sort(env); for (i = 0; i < lgd->lqeg_num_used; i++) { lgd->lqeg_arr[i].lge_qunit_set = 0; lgd->lqeg_arr[i].lge_qunit_nu = 0; lgd->lqeg_arr[i].lge_edquot_nu = 0; } for (i = 0; i < qti_lqes_cnt(env); i++) { struct lquota_entry *lqe = qti_lqes(env)[i]; int slaves_cnt; CDEBUG(D_QUOTA, "lqes_cnt %d, i %d\n", qti_lqes_cnt(env), i); qpi = lqe2qpi(lqe); if (qmt_pool_global(qpi)) { slaves_cnt = qpi_slv_nr_by_rtype(lqe2qpi(lqe), lqe_qtype(lqe)); } else { sem = qmt_sarr_rwsem(qpi); down_read(sem); slaves_cnt = qmt_sarr_count(qpi); } for (j = 0; j < slaves_cnt; j++) { idx = qmt_sarr_get_idx(qpi, j); LASSERT(idx >= 0); if (edquot) { int lge_edquot, new_edquot, edquot_nu; lge_edquot = lgd->lqeg_arr[idx].lge_edquot; edquot_nu = lgd->lqeg_arr[idx].lge_edquot_nu; new_edquot = lqe->lqe_edquot; if (lge_edquot == new_edquot || (edquot_nu && lge_edquot == 1)) goto qunit_lbl; lgd->lqeg_arr[idx].lge_edquot = new_edquot; /* it is needed for the following case: * initial values for idx i - * lqe_edquot = 1, lqe_edquot_nu == 0; * 1: new_edquot == 0 -> * lqe_edquot = 0, lqe_edquot_nu = 1; * 2: new_edquot == 1 -> * lqe_edquot = 1, lqe_edquot_nu = 0; * At the 2nd iteration lge_edquot comes back * to 1, so no changes and we don't need * to notify slave. */ lgd->lqeg_arr[idx].lge_edquot_nu = !edquot_nu; } qunit_lbl: if (qunit) { __u64 lge_qunit, new_qunit; CDEBUG(D_QUOTA, "idx %d lge_qunit_set %d lge_qunit %llu new_qunit %llu\n", idx, lgd->lqeg_arr[idx].lge_qunit_set, lgd->lqeg_arr[idx].lge_qunit, lqe->lqe_qunit); /* lge for this idx is already set * on previous iteration */ if (lgd->lqeg_arr[idx].lge_qunit_set) continue; lge_qunit = lgd->lqeg_arr[idx].lge_qunit; new_qunit = lqe->lqe_qunit; /* qunit could be not set, * so use global lqe's qunit */ if (!new_qunit) continue; if (lge_qunit != new_qunit) lgd->lqeg_arr[idx].lge_qunit = new_qunit; /* TODO: initially slaves notification was done * only for qunit shrinking. Should we always * notify slaves with new qunit ? */ if (lge_qunit > new_qunit) lgd->lqeg_arr[idx].lge_qunit_nu = 1; lgd->lqeg_arr[idx].lge_qunit_set = 1; } } if (!qmt_pool_global(qpi)) up_read(sem); } /* TODO: only for debug purposes - remove it later */ for (i = 0; i < lgd->lqeg_num_used; i++) CDEBUG(D_QUOTA, "lgd ost %d, qunit %lu nu %d; edquot %d nu %d\n", i, (long unsigned)lgd->lqeg_arr[i].lge_qunit, lgd->lqeg_arr[i].lge_qunit_nu, lgd->lqeg_arr[i].lge_edquot, lgd->lqeg_arr[i].lge_edquot_nu); EXIT; } void qmt_setup_lqe_gd(const struct lu_env *env, struct qmt_device *qmt, struct lquota_entry *lqe, struct lqe_glbl_data *lgd, int pool_type) { __u64 qunit; bool edquot; int i; qunit = lqe->lqe_qunit; edquot = lqe->lqe_edquot; /* Firstly set all elements in array with * qunit and edquot of global pool */ for (i = 0; i < lgd->lqeg_num_used; i++) { lgd->lqeg_arr[i].lge_qunit = qunit; lgd->lqeg_arr[i].lge_edquot = edquot; /* It is the very first lvb setup - qunit and other flags * will be sent to slaves during qmt_lvbo_fill. */ lgd->lqeg_arr[i].lge_qunit_nu = 0; lgd->lqeg_arr[i].lge_edquot_nu = 0; } qmt_pool_lqes_lookup_spec(env, qmt, pool_type, lqe_qtype(lqe), &lqe->lqe_id); qmt_seed_glbe(env, lgd); lqe->lqe_glbl_data = lgd; qmt_id_lock_notify(qmt, lqe); qti_lqes_fini(env); }