1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2012, 2016, Intel Corporation.
5 * Use is subject to license terms.
9 * Author: Johann Lombardi <johann.lombardi@intel.com>
10 * Author: Niu Yawei <yawei.niu@intel.com>
13 #define DEBUG_SUBSYSTEM S_LQUOTA
15 #include "qmt_internal.h"
17 static void qmt_work_lvbo_free(struct work_struct *work)
19 struct lqe_glbl_data *lgd;
20 struct lquota_entry *lqe;
22 lqe = container_of(work, struct lquota_entry, lqe_work);
23 mutex_lock(&lqe->lqe_glbl_data_lock);
24 lgd = lqe->lqe_glbl_data;
25 lqe->lqe_glbl_data = NULL;
26 mutex_unlock(&lqe->lqe_glbl_data_lock);
29 if (unlikely(lgd == NULL)) {
30 struct qmt_pool_info *pool;
32 pool = (struct qmt_pool_info *)lqe->lqe_site->lqs_parent;
33 CWARN("%s: lvbo for (id=%llx) not fully inited\n",
34 pool->qpi_qmt->qmt_svname,
38 /* release lqe reference */
43 * Initialize qmt-specific fields of quota entry.
45 * \param lqe - is the quota entry to initialize
46 * \param arg - is the pointer to the qmt_pool_info structure
48 static void qmt_lqe_init(struct lquota_entry *lqe, void *arg)
50 LASSERT(lqe_is_master(lqe));
52 lqe->lqe_revoke_time = 0;
53 init_rwsem(&lqe->lqe_sem);
54 mutex_init(&lqe->lqe_glbl_data_lock);
55 INIT_WORK(&lqe->lqe_work, qmt_work_lvbo_free);
58 /* Apply the default quota setting to the specified quota entry
60 * \param env - is the environment passed by the caller
61 * \param pool - is the quota pool of the quota entry
62 * \param lqe - is the lquota_entry object to apply default quota on
63 * \param create_record - if true, an global quota record will be created and
67 * \retval -ve : other appropriate errors
69 int qmt_lqe_set_default(const struct lu_env *env, struct qmt_pool_info *pool,
70 struct lquota_entry *lqe, bool create_record)
72 struct lquota_entry *lqe_def;
77 if (lqe->lqe_id.qid_uid == 0)
80 lqe_def = pool->qpi_grace_lqe[lqe_qtype(lqe)];
82 LQUOTA_DEBUG(lqe, "inherit default quota");
84 lqe->lqe_is_default = true;
85 lqe->lqe_hardlimit = lqe_def->lqe_hardlimit;
86 lqe->lqe_softlimit = lqe_def->lqe_softlimit;
89 lqe->lqe_uptodate = true;
90 rc = qmt_set_with_lqe(env, pool->qpi_qmt, lqe, 0, 0,
91 LQUOTA_GRACE_FLAG(0, LQUOTA_FLAG_DEFAULT),
92 QIF_TIMES, true, false);
95 LQUOTA_ERROR(lqe, "failed to create the global quota"
99 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0)
100 lqe->lqe_enforced = false;
102 lqe->lqe_enforced = true;
108 * Update a lquota entry. This is done by reading quota settings from the global
109 * index. The lquota entry must be write locked.
111 * \param env - the environment passed by the caller
112 * \param lqe - is the quota entry to refresh
113 * \param arg - is the pointer to the qmt_pool_info structure
114 * \param find - don't create lqe on disk in case of ENOENT if true
116 static int qmt_lqe_read(const struct lu_env *env, struct lquota_entry *lqe,
117 void *arg, bool find)
119 struct qmt_thread_info *qti = qmt_info(env);
120 struct qmt_pool_info *pool = (struct qmt_pool_info *)arg;
124 LASSERT(lqe_is_master(lqe));
126 if (qmt_pool_global(pool))
127 lqe->lqe_is_global = 1;
129 /* read record from disk */
130 rc = lquota_disk_read(env, pool->qpi_glb_obj[lqe->lqe_site->lqs_qtype],
131 &lqe->lqe_id, (struct dt_rec *)&qti->qti_glb_rec);
137 qmt_lqe_set_default(env, pool, lqe, true);
140 /* copy quota settings from on-disk record */
141 lqe->lqe_granted = qti->qti_glb_rec.qbr_granted;
142 lqe->lqe_hardlimit = qti->qti_glb_rec.qbr_hardlimit;
143 lqe->lqe_softlimit = qti->qti_glb_rec.qbr_softlimit;
144 lqe->lqe_gracetime = LQUOTA_GRACE(qti->qti_glb_rec.qbr_time);
146 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0 &&
147 (LQUOTA_FLAG(qti->qti_glb_rec.qbr_time) &
148 LQUOTA_FLAG_DEFAULT))
149 qmt_lqe_set_default(env, pool, lqe, false);
150 else if (LQUOTA_FLAG(qti->qti_glb_rec.qbr_time) &
152 lqe->lqe_is_reset = true;
155 LQUOTA_ERROR(lqe, "failed to read quota entry from disk, rc:%d",
160 if (lqe->lqe_id.qid_uid == 0 ||
161 (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0))
162 /* {hard,soft}limit=0 means no quota enforced */
163 lqe->lqe_enforced = false;
165 lqe->lqe_enforced = true;
167 LQUOTA_DEBUG(lqe, "read");
172 * Print lqe information for debugging.
174 * \param lqe - is the quota entry to debug
175 * \param arg - is the pointer to the qmt_pool_info structure
176 * \param msgdata - debug message
177 * \param fmt - format of debug message
179 static void qmt_lqe_debug(struct lquota_entry *lqe, void *arg,
180 struct libcfs_debug_msg_data *msgdata,
181 struct va_format *vaf)
183 struct qmt_pool_info *pool = (struct qmt_pool_info *)arg;
185 libcfs_debug_msg(msgdata,
186 "%pV qmt:%s pool:%s-%s id:%llu enforced:%d hard:%llu soft:%llu granted:%llu time:%llu qunit: %llu edquot:%d may_rel:%llu revoke:%lld default:%s\n",
187 vaf, pool->qpi_qmt->qmt_svname,
188 RES_NAME(pool->qpi_rtype),
190 lqe->lqe_id.qid_uid, lqe->lqe_enforced,
191 lqe->lqe_hardlimit, lqe->lqe_softlimit,
192 lqe->lqe_granted, lqe->lqe_gracetime,
193 lqe->lqe_qunit, lqe->lqe_edquot, lqe->lqe_may_rel,
194 lqe->lqe_revoke_time,
195 lqe->lqe_is_default ? "yes" : "no");
199 * Vector of quota entry operations supported on the master
201 const struct lquota_entry_operations qmt_lqe_ops = {
202 .lqe_init = qmt_lqe_init,
203 .lqe_read = qmt_lqe_read,
204 .lqe_debug = qmt_lqe_debug,
208 * Reserve enough credits to update records in both the global index and
209 * the slave index identified by \slv_obj
211 * \param env - is the environment passed by the caller
212 * \param lqe - is the quota entry associated with the identifier
213 * subject to the change. If it is NULL lqes array is
214 * taken from env with qti_lqes_env(env).
215 * \param slv_obj - is the dt_object associated with the index file
216 * \param sync - make transaction sync if true
218 struct thandle *qmt_trans_start_with_slv(const struct lu_env *env,
219 struct lquota_entry *lqe,
220 struct dt_object *slv_obj,
223 struct qmt_device *qmt;
225 struct lquota_entry **lqes;
226 struct qmt_lqe_restore *restore;
230 restore = qti_lqes_rstr(env);
232 lqes_cnt = qti_lqes_cnt(env);
233 lqes = qti_lqes(env);
239 /* qmt is the same for all lqes, so take it from the 1st */
240 qmt = lqe2qpi(lqes[0])->qpi_qmt;
243 LQUOTA_DEBUG(lqes[0], "declare write for slv "DFID,
244 PFID(lu_object_fid(&slv_obj->do_lu)));
246 /* start transaction */
247 th = dt_trans_create(env, qmt->qmt_child);
252 /* quota settings on master are updated synchronously for the
256 /* reserve credits for global index update */
257 for (i = 0; i < lqes_cnt; i++) {
258 rc = lquota_disk_declare_write(env, th,
259 LQE_GLB_OBJ(lqes[i]),
265 if (slv_obj != NULL) {
266 /* reserve credits for slave index update */
267 rc = lquota_disk_declare_write(env, th, slv_obj,
273 /* start transaction */
274 rc = dt_trans_start_local(env, qmt->qmt_child, th);
281 dt_trans_stop(env, qmt->qmt_child, th);
283 LQUOTA_ERROR(lqes[0], "failed to slv declare write for "DFID
284 ", rc:%d", PFID(lu_object_fid(&slv_obj->do_lu)),
287 for (i = 0; i < lqes_cnt; i++) {
288 restore[i].qlr_hardlimit = lqes[i]->lqe_hardlimit;
289 restore[i].qlr_softlimit = lqes[i]->lqe_softlimit;
290 restore[i].qlr_gracetime = lqes[i]->lqe_gracetime;
291 restore[i].qlr_granted = lqes[i]->lqe_granted;
292 restore[i].qlr_qunit = lqes[i]->lqe_qunit;
299 * Reserve enough credits to update a record in the global index
301 * \param env - is the environment passed by the caller
302 * \param lqe - is the quota entry to be modified in the global index
303 * \param restore - is a temporary storage for current quota settings which will
304 * be restored if something goes wrong at index update time.
306 struct thandle *qmt_trans_start(const struct lu_env *env,
307 struct lquota_entry *lqe)
309 LQUOTA_DEBUG(lqe, "declare write");
310 return qmt_trans_start_with_slv(env, lqe, NULL, true);
313 int qmt_glb_write_lqes(const struct lu_env *env, struct thandle *th,
314 __u32 flags, __u64 *ver)
318 for (i = 0; i < qti_lqes_cnt(env); i++) {
319 rc = qmt_glb_write(env, th, qti_lqes(env)[i], flags, ver);
327 * Update record associated with a quota entry in the global index.
328 * If LQUOTA_BUMP_VER is set, then the global index version must also be
330 * The entry must be at least read locked, dirty and up-to-date.
332 * \param env - the environment passed by the caller
333 * \param th - is the transaction handle to be used for the disk writes
334 * \param lqe - is the quota entry to udpate
335 * \param obj - is the dt_object associated with the index file
336 * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER.
337 * \param ver - is used to return the new version of the index.
339 * \retval - 0 on success and lqe dirty flag cleared,
340 * appropriate error on failure and uptodate flag cleared.
342 int qmt_glb_write(const struct lu_env *env, struct thandle *th,
343 struct lquota_entry *lqe, __u32 flags, __u64 *ver)
345 struct qmt_thread_info *qti = qmt_info(env);
346 struct lquota_glb_rec *rec;
350 LASSERT(lqe != NULL);
351 LASSERT(lqe_is_master(lqe));
352 LASSERT(lqe_is_locked(lqe));
353 LASSERT(lqe->lqe_uptodate);
354 LASSERT((flags & ~(LQUOTA_BUMP_VER | LQUOTA_SET_VER)) == 0);
356 LQUOTA_DEBUG(lqe, "write glb");
358 /* never delete the entry even when the id isn't enforced and
359 * no any guota granted, otherwise, this entry will not be
360 * synced to slave during the reintegration. */
361 rec = &qti->qti_glb_rec;
363 /* fill global index with updated quota settings */
364 rec->qbr_granted = lqe->lqe_granted;
365 if (lqe->lqe_is_default) {
366 rec->qbr_hardlimit = 0;
367 rec->qbr_softlimit = 0;
368 rec->qbr_time = LQUOTA_GRACE_FLAG(lqe->lqe_gracetime,
369 LQUOTA_FLAG_DEFAULT);
370 } else if (lqe->lqe_is_reset) {
371 rec->qbr_hardlimit = 0;
372 rec->qbr_softlimit = 0;
373 rec->qbr_granted = 0;
374 rec->qbr_time = LQUOTA_GRACE_FLAG(lqe->lqe_gracetime,
377 rec->qbr_hardlimit = lqe->lqe_hardlimit;
378 rec->qbr_softlimit = lqe->lqe_softlimit;
379 rec->qbr_time = lqe->lqe_gracetime;
382 /* write new quota settings */
383 rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id,
384 (struct dt_rec *)rec, flags, ver);
386 /* we failed to write the new quota settings to disk, report
387 * error to caller who will restore the initial value */
388 LQUOTA_ERROR(lqe, "failed to update global index, rc:%d", rc);
394 * Read from disk how much quota space is allocated to a slave.
395 * This is done by reading records from the dedicated slave index file.
396 * Return in \granted how much quota space is currently allocated to the
398 * The entry must be at least read locked.
400 * \param env - the environment passed by the caller
401 * \param lqe_id - is the quota id associated with the identifier to look-up
403 * \param slv_obj - is the dt_object associated with the slave index
404 * \param granted - is the output parameter where to return how much space
405 * is granted to the slave.
407 * \retval - 0 on success, appropriate error on failure
409 int qmt_slv_read(const struct lu_env *env, union lquota_id *qid,
410 struct dt_object *slv_obj, __u64 *granted)
412 struct qmt_thread_info *qti = qmt_info(env);
413 struct lquota_slv_rec *slv_rec = &qti->qti_slv_rec;
417 CDEBUG(D_QUOTA, "read id:%llu form slv "DFID"\n",
418 qid->qid_uid, PFID(lu_object_fid(&slv_obj->do_lu)));
420 /* read slave record from disk */
421 rc = lquota_disk_read(env, slv_obj, qid,
422 (struct dt_rec *)slv_rec);
428 /* extract granted from on-disk record */
429 *granted = slv_rec->qsr_granted;
432 CERROR("Failed to read slave record for %llu from "DFID"\n",
433 qid->qid_uid, PFID(lu_object_fid(&slv_obj->do_lu)));
437 CDEBUG(D_QUOTA, "Successful slv read %llu\n", *granted);
443 * Update record in slave index file.
444 * The entry must be at least read locked.
446 * \param env - the environment passed by the caller
447 * \param th - is the transaction handle to be used for the disk writes
448 * \param lqe - is the dirty quota entry which will be updated at the same time
450 * \param slv_obj - is the dt_object associated with the slave index
451 * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER.
452 * \param ver - is used to return the new version of the index.
453 * \param granted - is the new amount of quota space owned by the slave
455 * \retval - 0 on success, appropriate error on failure
457 int qmt_slv_write(const struct lu_env *env, struct thandle *th,
458 struct lquota_entry *lqe, struct dt_object *slv_obj,
459 __u32 flags, __u64 *ver, __u64 granted)
461 struct qmt_thread_info *qti = qmt_info(env);
462 struct lquota_slv_rec *rec;
466 LASSERT(lqe != NULL);
467 LASSERT(lqe_is_master(lqe));
468 LASSERT(lqe_is_locked(lqe));
470 LQUOTA_DEBUG(lqe, "write slv "DFID" granted:%llu",
471 PFID(lu_object_fid(&slv_obj->do_lu)), granted);
473 /* never delete the entry, otherwise, it'll not be transferred
474 * to slave during reintegration. */
475 rec = &qti->qti_slv_rec;
477 /* updated space granted to this slave */
478 rec->qsr_granted = granted;
480 /* write new granted space */
481 rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id,
482 (struct dt_rec *)rec, flags, ver);
485 "failed to update slave index "DFID" granted:%llu",
486 PFID(lu_object_fid(&slv_obj->do_lu)),
495 * Check whether new limits are valid for this pool
497 * \param lqe - is the quota entry subject to the setquota
498 * \param hard - is the new hard limit
499 * \param soft - is the new soft limit
501 int qmt_validate_limits(struct lquota_entry *lqe, __u64 hard, __u64 soft)
505 if (hard != 0 && soft > hard)
506 /* soft limit must be less than hard limit */
512 * Set/clear edquot flag after quota space allocation/release or settings
513 * change. Slaves will be notified of changes via glimpse on per-ID lock
515 * \param lqe - is the quota entry to check
516 * \param now - is the current time in second used for grace time managment
518 bool qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now)
520 struct qmt_pool_info *pool = lqe2qpi(lqe);
523 if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0)
526 if (!lqe->lqe_edquot) {
527 /* space exhausted flag not set, let's check whether it is time
530 if (!qmt_space_exhausted(lqe, now))
531 /* the qmt still has available space */
534 /* See comment in qmt_adjust_qunit(). LU-4139 */
535 if (qmt_hard_exhausted(lqe) ||
536 pool->qpi_rtype != LQUOTA_RES_DT) {
539 /* we haven't reached the minimal qunit yet so there is
540 * still hope that the rebalancing process might free
541 * up some quota space */
542 if (lqe->lqe_qunit != pool->qpi_least_qunit)
545 /* least qunit value not sent to all slaves yet */
546 if (lqe->lqe_revoke_time == 0 &&
547 !lqe->lqe_gl && list_empty(&lqe->lqe_link)) {
548 /* LU-16736: the revoke_time should be set when
549 * the qunit reachs the least qunit, the quota
550 * LDLM lock could encounter some issue, setting
551 * it to avoid endless wait in QSD. */
552 LQUOTA_ERROR(lqe, "set revoke_time explicitly");
554 lqe->lqe_revoke_time = ktime_get_seconds();
558 /* Let's give more time to slave to release space */
559 lapse = ktime_get_seconds() - QMT_REBA_TIMEOUT;
560 if (lqe->lqe_may_rel != 0 && lqe->lqe_revoke_time > lapse)
563 if (lqe->lqe_qunit > pool->qpi_soft_least_qunit)
567 /* set edquot flag */
568 lqe->lqe_edquot = true;
570 /* space exhausted flag set, let's check whether it is time to
573 if (qmt_space_exhausted(lqe, now))
574 /* the qmt still has not space */
577 if (lqe->lqe_hardlimit != 0 &&
578 lqe->lqe_granted + pool->qpi_least_qunit >
580 /* we clear the flag only once at least one least qunit
584 /* clear edquot flag */
585 lqe->lqe_edquot = false;
588 LQUOTA_DEBUG(lqe, "changing edquot flag");
590 /* let's notify slave by issuing glimpse on per-ID lock.
591 * the rebalance thread will take care of this */
595 /* Using least_qunit when over block softlimit will seriously impact the
596 * write performance, we need to do some special tweaking on that. */
597 static __u64 qmt_calc_softlimit(struct lquota_entry *lqe, bool *oversoft)
599 struct qmt_pool_info *pool = lqe2qpi(lqe);
601 LASSERT(lqe->lqe_softlimit != 0);
603 /* No need to do special tweaking for inode limit */
604 if (pool->qpi_rtype != LQUOTA_RES_DT)
605 return lqe->lqe_softlimit;
607 if (lqe->lqe_granted <= lqe->lqe_softlimit +
608 pool->qpi_soft_least_qunit) {
609 return lqe->lqe_softlimit;
610 } else if (lqe->lqe_hardlimit != 0) {
612 return lqe->lqe_hardlimit;
620 * Try to grant more quota space back to slave.
622 * \param lqe - is the quota entry for which we would like to allocate more
624 * \param granted - is how much was already granted as part of the request
626 * \param spare - is how much unused quota space the slave already owns
628 * \retval return how additional space can be granted to the slave
630 __u64 qmt_alloc_expand(struct lquota_entry *lqe, __u64 granted, __u64 spare)
632 struct qmt_pool_info *pool = lqe2qpi(lqe);
633 __u64 remaining, qunit;
636 LASSERT(lqe->lqe_enforced && lqe->lqe_qunit != 0);
638 slv_cnt = qpi_slv_nr(lqe2qpi(lqe), lqe_qtype(lqe));
639 qunit = lqe->lqe_qunit;
641 /* See comment in qmt_adjust_qunit(). LU-4139. */
642 if (lqe->lqe_softlimit != 0) {
644 remaining = qmt_calc_softlimit(lqe, &oversoft);
646 remaining = lqe->lqe_granted +
647 pool->qpi_soft_least_qunit;
649 remaining = lqe->lqe_hardlimit;
652 if (lqe->lqe_granted >= remaining)
655 remaining -= lqe->lqe_granted;
661 granted &= (qunit - 1);
663 if (remaining > (slv_cnt * qunit) >> 1) {
664 /* enough room to grant more space w/o additional
665 * shrinking ... at least for now */
666 remaining -= (slv_cnt * qunit) >> 1;
667 } else if (qunit != pool->qpi_least_qunit) {
672 granted &= (qunit - 1);
674 RETURN(min_t(__u64, qunit - spare, remaining));
676 RETURN(min_t(__u64, qunit - granted, remaining));
677 } while (qunit >= pool->qpi_least_qunit);
683 qmt_adjust_qunit_set_revoke(const struct lu_env *env, struct lquota_entry *lqe,
684 unsigned long least_qunit)
686 struct lquota_entry *lqe2;
690 if (qti_lqes_cnt(env) <= 1)
693 for (i = 0; i < qti_lqes_cnt(env); i++) {
694 lqe2 = qti_lqes(env)[i];
695 if ((lqe2->lqe_qunit == least_qunit) && lqe2->lqe_revoke_time) {
697 min = lqe2->lqe_revoke_time;
700 min = lqe2->lqe_revoke_time < min ?
701 lqe2->lqe_revoke_time : min;
705 lqe->lqe_revoke_time = min;
710 * Adjust qunit size according to quota limits and total granted count.
711 * The caller must have locked the lqe.
713 * \param env - the environment passed by the caller
714 * \param lqe - is the qid entry to be adjusted
715 * \retval true - need reseed glbe array
717 bool qmt_adjust_qunit(const struct lu_env *env, struct lquota_entry *lqe)
719 struct qmt_pool_info *pool = lqe2qpi(lqe);
720 bool need_reseed = false;
722 __u64 qunit, limit, qunit2 = 0;
725 LASSERT(lqe_is_locked(lqe));
727 if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0)
728 /* no quota limits */
731 /* record how many slaves have already registered */
732 slv_cnt = qpi_slv_nr(pool, lqe_qtype(lqe));
734 /* Pool hasn't slaves anymore. Qunit will be adjusted
735 * again when new slaves would be added. */
736 if (lqe->lqe_qunit) {
740 /* wait for at least one slave to join */
744 /* Qunit calculation is based on soft limit, if any, hard limit
745 * otherwise. This means that qunit is shrunk to the minimum when
746 * beyond the soft limit. This will impact performance, but that's the
747 * price of an accurate grace time management. */
748 if (lqe->lqe_softlimit != 0) {
750 /* As a compromise of write performance and the grace time
751 * accuracy, the block qunit size will be shrunk to
752 * qpi_soft_least_qunit when over softlimit. LU-4139. */
753 limit = qmt_calc_softlimit(lqe, &oversoft);
755 qunit2 = pool->qpi_soft_least_qunit;
757 GOTO(done, qunit = qunit2);
758 } else if (lqe->lqe_hardlimit != 0) {
759 limit = lqe->lqe_hardlimit;
761 LQUOTA_ERROR(lqe, "enforced bit set, but neither hard nor soft "
766 qunit = lqe->lqe_qunit == 0 ? pool->qpi_least_qunit : lqe->lqe_qunit;
768 /* The qunit value is computed as follows: limit / (2 * slv_cnt).
769 * Then 75% of the quota space can be granted with current qunit value.
770 * The remaining 25% are then used with reduced qunit size (by a factor
771 * of 4) which is then divided in a similar manner.
773 * |---------------------limit---------------------|
774 * |-------limit / 2-------|-limit / 4-|-limit / 4-|
775 * |qunit|qunit|qunit|qunit| | |
776 * |----slv_cnt * qunit----| | |
777 * |-grow limit-| | | |
778 * |--------------shrink limit---------| |
779 * |---space granted in qunit chunks---|-remaining-|
785 * qunit >>= 2; |qunit*slv_cnt|qunit*slv_cnt|
786 * |---space in qunit---|remain|
788 if (qunit == pool->qpi_least_qunit ||
789 limit >= lqe->lqe_granted + ((slv_cnt * qunit) >> 1)) {
790 /* current qunit value still fits, let's see if we can afford to
791 * increase qunit now ...
792 * To increase qunit again, we have to be under 25% */
793 while (qunit && limit >= lqe->lqe_granted + 6 * qunit * slv_cnt)
798 do_div(qunit, 2 * slv_cnt);
802 /* shrink qunit until we find a suitable value */
803 while (qunit > pool->qpi_least_qunit &&
804 limit < lqe->lqe_granted + ((slv_cnt * qunit) >> 1))
808 if (qunit2 && qunit > qunit2)
811 if (lqe->lqe_qunit == qunit)
812 /* keep current qunit */
815 LQUOTA_DEBUG(lqe, "%s qunit to %llu",
816 lqe->lqe_qunit < qunit ? "increasing" : "decreasing",
819 /* store new qunit value */
820 swap(lqe->lqe_qunit, qunit);
822 /* reseed glbe array and notify
823 * slave if qunit was shrinked */
825 /* reset revoke time */
826 lqe->lqe_revoke_time = 0;
828 if (lqe->lqe_qunit == pool->qpi_least_qunit) {
829 if (lqe->lqe_qunit >= qunit)
830 /* initial qunit value is the smallest one */
831 lqe->lqe_revoke_time = ktime_get_seconds();
832 /* If there are several lqes and lqe_revoke_time is set for
833 * some of them, it means appropriate OSTs have been already
834 * notified with the least qunit and there is no chance to
835 * free more space. Find an lqe with the minimum(earliest)
836 * revoke_time and set this time to the current one.
838 qmt_adjust_qunit_set_revoke(env, lqe, pool->qpi_least_qunit);
843 bool qmt_adjust_edquot_qunit_notify(const struct lu_env *env,
844 struct qmt_device *qmt,
845 __u64 now, bool edquot,
846 bool qunit, __u32 qb_flags,
849 struct lquota_entry *lqe_gl, *lqe;
854 lqe_gl = qti_lqes_glbl(env);
856 for (i = 0; i < qti_lqes_cnt(env); i++) {
857 lqe = qti_lqes(env)[i];
859 reseed |= qmt_adjust_qunit(env, lqe);
861 reseed |= qmt_adjust_edquot(lqe, now);
865 if (!lqe_gl->lqe_glbl_data &&
866 (req_has_rep(qb_flags) || req_is_rel(qb_flags))) {
869 "%s: can not notify - lge_glbl_data is not set\n",
874 if (reseed || idx >= 0) {
875 mutex_lock(&lqe_gl->lqe_glbl_data_lock);
876 if (lqe_gl->lqe_glbl_data) {
877 struct lqe_glbl_data *lgd = lqe_gl->lqe_glbl_data;
880 qmt_seed_glbe_all(env, lgd, qunit, edquot,
882 } else if (idx >= 0) {
883 int lge_idx = qmt_map_lge_idx(lgd, idx);
885 LASSERT(lge_idx >= 0);
886 /* If there are no locks yet when
887 * lge_qunit/edquot_nu is set, slaves
888 * are still not notified with new
889 * qunit/edquot value. In a such case
890 * we need to notify them with new values to
891 * avoid endless EINPROGRESS if qunit is equal
892 * to the least qunit, but lqe_revoke_time is
895 notify = lgd->lqeg_arr[lge_idx].lge_qunit_nu ||
896 lgd->lqeg_arr[lge_idx].lge_edquot_nu;
899 mutex_unlock(&lqe_gl->lqe_glbl_data_lock);
902 if (reseed || notify)
903 qmt_id_lock_notify(qmt, lqe_gl);
910 * Adjust qunit & edquot flag in case it wasn't initialized already (e.g.
911 * limit set while no slaves were connected yet)
913 bool qmt_revalidate(const struct lu_env *env, struct lquota_entry *lqe)
915 bool need_notify = false;
917 if (lqe->lqe_qunit == 0) {
918 /* lqe was read from disk, but neither qunit, nor edquot flag
919 * were initialized */
920 need_notify = qmt_adjust_qunit(env, lqe);
921 if (lqe->lqe_qunit != 0)
922 need_notify |= qmt_adjust_edquot(lqe,
923 ktime_get_real_seconds());
929 void qmt_revalidate_lqes(const struct lu_env *env,
930 struct qmt_device *qmt, __u32 qb_flags)
932 struct lquota_entry *lqe_gl = qti_lqes_glbl(env);
933 bool need_notify = false;
936 for (i = 0; i < qti_lqes_cnt(env); i++)
937 need_notify |= qmt_revalidate(env, qti_lqes(env)[i]);
942 /* There could be no ID lock to the moment of reconciliation.
943 * As a result lqe global data is not initialised yet. It is ok
944 * for release and report requests. */
945 if (!lqe_gl->lqe_glbl_data &&
946 (req_is_rel(qb_flags) || req_has_rep(qb_flags))) {
950 mutex_lock(&lqe_gl->lqe_glbl_data_lock);
951 if (lqe_gl->lqe_glbl_data)
952 qmt_seed_glbe(env, lqe_gl->lqe_glbl_data, false);
953 mutex_unlock(&lqe_gl->lqe_glbl_data_lock);
955 qmt_id_lock_notify(qmt, lqe_gl);
958 void qti_lqes_init(const struct lu_env *env)
960 struct qmt_thread_info *qti = qmt_info(env);
962 qti->qti_lqes_cnt = 0;
963 qti->qti_glbl_lqe_idx = 0;
964 qti->qti_lqes_num = QMT_MAX_POOL_NUM;
967 int qti_lqes_add(const struct lu_env *env, struct lquota_entry *lqe)
969 struct qmt_thread_info *qti = qmt_info(env);
971 if (qti->qti_lqes_cnt >= qti->qti_lqes_num) {
972 struct lquota_entry **lqes;
973 lqes = qti->qti_lqes;
974 OBD_ALLOC(lqes, sizeof(lqe) * qti->qti_lqes_num * 2);
977 memcpy(lqes, qti_lqes(env), qti->qti_lqes_cnt * sizeof(lqe));
978 /* Don't need to free, if it is the very 1st allocation */
979 if (qti->qti_lqes_num > QMT_MAX_POOL_NUM)
980 OBD_FREE(qti->qti_lqes,
981 qti->qti_lqes_num * sizeof(lqe));
982 qti->qti_lqes = lqes;
983 qti->qti_lqes_num *= 2;
986 if (lqe->lqe_is_global)
987 qti->qti_glbl_lqe_idx = qti->qti_lqes_cnt;
988 qti_lqes(env)[qti->qti_lqes_cnt++] = lqe;
990 /* The pool could be accessed directly from lqe, so take
991 * extra reference that is put in qti_lqes_fini */
992 qpi_getref(lqe2qpi(lqe));
994 CDEBUG(D_QUOTA, "LQE %px %lu is added, lqe_cnt %d lqes_num %d\n",
995 lqe, (long unsigned)lqe->lqe_id.qid_uid,
996 qti->qti_lqes_cnt, qti->qti_lqes_num);
997 LASSERT(qti->qti_lqes_num != 0);
1002 void qti_lqes_del(const struct lu_env *env, int index)
1004 struct lquota_entry **lqes;
1005 int lqes_cnt = qti_lqes_cnt(env);
1006 int lqep_size = sizeof(struct lquota_entry *);
1009 /* We can't handle non global lqes correctly without
1010 * global lqe located at index 0. If we try to do so,
1011 * something goes wrong. */
1012 LQUOTA_ERROR(qti_lqes_glbl(env),
1013 "quota: cannot remove lqe at index 0 as it is global");
1014 LASSERT(qti_lqes_glbl(env)->lqe_is_global);
1017 lqes = qti_lqes(env);
1018 qpi_putref(env, lqe2qpi(lqes[index]));
1019 lqe_putref(lqes[index]);
1020 memcpy((unsigned char *)lqes + index * lqep_size,
1021 (unsigned char *)lqes + (index + 1) * lqep_size,
1022 (lqes_cnt - index - 1) * lqep_size);
1023 qti_lqes_cnt(env)--;
1026 void qti_lqes_fini(const struct lu_env *env)
1028 struct qmt_thread_info *qti = qmt_info(env);
1029 struct lquota_entry **lqes = qti->qti_lqes;
1032 lqes = qti_lqes(env);
1033 for (i = 0; i < qti->qti_lqes_cnt; i++) {
1034 qpi_putref(env, lqe2qpi(lqes[i]));
1035 lqe_putref(lqes[i]);
1038 if (qti->qti_lqes_num > QMT_MAX_POOL_NUM)
1039 OBD_FREE(qti->qti_lqes,
1040 qti->qti_lqes_num * sizeof(struct lquota_entry *));
1042 qti->qti_lqes_num = 0;
1043 qti->qti_lqes_cnt = 0;
1046 __u64 qti_lqes_min_qunit(const struct lu_env *env)
1051 for (i = 1, min = qti_lqe_qunit(env, 0); i < qti_lqes_cnt(env); i++) {
1052 qunit = qti_lqe_qunit(env, i);
1053 /* if qunit is 0, lqe is not enforced and we can ignore it */
1054 if (qunit && qunit < min)
1061 int qti_lqes_edquot(const struct lu_env *env)
1065 for (i = 0; i < qti_lqes_cnt(env); i++) {
1066 if (qti_lqes(env)[i]->lqe_edquot)
1073 int qti_lqes_restore_init(const struct lu_env *env)
1077 if (qti_lqes_inited(env) && qti_lqes_cnt(env) > QMT_MAX_POOL_NUM) {
1078 OBD_ALLOC(qmt_info(env)->qti_lqes_rstr,
1079 qti_lqes_cnt(env) * sizeof(struct qmt_lqe_restore));
1080 if (!qmt_info(env)->qti_lqes_rstr)
1087 void qti_lqes_restore_fini(const struct lu_env *env)
1089 if (qti_lqes_inited(env) && qti_lqes_cnt(env) > QMT_MAX_POOL_NUM)
1090 OBD_FREE(qmt_info(env)->qti_lqes_rstr,
1091 qti_lqes_cnt(env) * sizeof(struct qmt_lqe_restore));
1094 void qti_lqes_write_lock(const struct lu_env *env)
1098 for (i = 0; i < qti_lqes_cnt(env); i++)
1099 lqe_write_lock(qti_lqes(env)[i]);
1102 void qti_lqes_write_unlock(const struct lu_env *env)
1106 for (i = 0; i < qti_lqes_cnt(env); i++)
1107 lqe_write_unlock(qti_lqes(env)[i]);
1110 #define QMT_INIT_SLV_CNT 64
1111 struct lqe_glbl_data *qmt_alloc_lqe_gd(struct qmt_pool_info *pool, int qtype)
1113 struct lqe_glbl_data *lgd;
1114 struct lqe_glbl_entry *lqeg_arr;
1115 int slv_cnt, glbe_num;
1117 OBD_ALLOC(lgd, sizeof(struct lqe_glbl_data));
1121 slv_cnt = qpi_slv_nr_by_rtype(pool, qtype);
1123 glbe_num = slv_cnt < QMT_INIT_SLV_CNT ? QMT_INIT_SLV_CNT : slv_cnt;
1124 OBD_ALLOC(lqeg_arr, sizeof(struct lqe_glbl_entry) * glbe_num);
1126 OBD_FREE(lgd, sizeof(struct lqe_glbl_data));
1130 CDEBUG(D_QUOTA, "slv_cnt %d glbe_num %d\n", slv_cnt, glbe_num);
1132 lgd->lqeg_num_used = slv_cnt;
1133 lgd->lqeg_num_alloc = glbe_num;
1134 lgd->lqeg_arr = lqeg_arr;
1139 void qmt_free_lqe_gd(struct lqe_glbl_data *lgd)
1144 OBD_FREE(lgd->lqeg_arr,
1145 sizeof(struct lqe_glbl_entry) * lgd->lqeg_num_alloc);
1146 OBD_FREE(lgd, sizeof(struct lqe_glbl_data));
1149 int qmt_map_lge_idx(struct lqe_glbl_data *lgd, int ostidx)
1153 CDEBUG(D_QUOTA, "mapping ostidx %d num_used %d\n", ostidx,
1154 lgd->lqeg_num_used);
1155 /* check common case of sequential OST numbers first */
1156 if (ostidx < lgd->lqeg_num_used &&
1157 lgd->lqeg_arr[ostidx].lge_idx == ostidx)
1160 for (k = 0; k < lgd->lqeg_num_used; k++)
1161 if (lgd->lqeg_arr[k].lge_idx == ostidx)
1164 if (k >= lgd->lqeg_num_used) {
1165 CERROR("qmt: cannot map ostidx %d, num_used %d: rc = %d\n",
1166 ostidx, lgd->lqeg_num_used, -EINVAL);
1173 void qmt_seed_glbe_all(const struct lu_env *env, struct lqe_glbl_data *lgd,
1174 bool qunit, bool edquot, bool pool_locked)
1176 struct qmt_pool_info *qpi;
1180 if (!qti_lqes_cnt(env))
1182 /* lqes array is sorted by qunit - the first entry has minimum qunit.
1183 * Thus start seeding global qunit's array beginning from the 1st lqe
1184 * and appropriate pool. If pools overlapped, slaves from this
1185 * overlapping get minimum qunit value.
1186 * user1: pool1, pool2, pool_glbl;
1187 * pool1: OST1; user1_qunit = 10M;
1188 * pool2: OST0, OST1, OST2; user1_qunit = 30M;
1189 * pool_glbl: OST0, OST1, OST2, OST3; user1_qunit = 160M;
1190 * qunit array after seeding should be:
1191 * OST0: 30M; OST1: 10M; OST2: 30M; OST3: 160M; */
1193 /* edquot resetup algorythm works fine
1194 * with not sorted lqes */
1198 for (i = 0; i < lgd->lqeg_num_used; i++) {
1199 lgd->lqeg_arr[i].lge_qunit_set = 0;
1200 lgd->lqeg_arr[i].lge_qunit_nu = 0;
1201 lgd->lqeg_arr[i].lge_edquot_nu = 0;
1204 for (i = 0; i < qti_lqes_cnt(env); i++) {
1205 struct lquota_entry *lqe = qti_lqes(env)[i];
1208 CDEBUG(D_QUOTA, "lqes_cnt %d, i %d\n", qti_lqes_cnt(env), i);
1211 qmt_sarr_read_down(qpi);
1213 slaves_cnt = qmt_sarr_count(qpi);
1215 for (j = 0; j < slaves_cnt; j++) {
1218 tgt_idx = qmt_sarr_get_idx(qpi, j);
1219 LASSERT(tgt_idx >= 0);
1220 idx = qmt_map_lge_idx(lgd, tgt_idx);
1221 /* ENOENT is fine here - it is possible when
1222 * quota_master/dt-0x0 hasn't got indexes
1223 * files for all OSTs yet. At the same time
1224 * Quota Pool may include all OSTs just from
1225 * configuration despite they haven't connected
1228 if (idx < 0 && !lqe->lqe_is_global)
1230 LASSERTF(idx >= 0, "idx %d lqe_is_global %d lqe %px\n",
1231 idx, lqe->lqe_is_global, lqe);
1234 int lge_edquot, new_edquot, edquot_nu;
1236 lge_edquot = lgd->lqeg_arr[idx].lge_edquot;
1237 edquot_nu = lgd->lqeg_arr[idx].lge_edquot_nu;
1238 new_edquot = lqe->lqe_edquot;
1240 if (lge_edquot == new_edquot ||
1241 (edquot_nu && lge_edquot == 1))
1243 lgd->lqeg_arr[idx].lge_edquot = new_edquot;
1244 /* it is needed for the following case:
1245 * initial values for idx i -
1246 * lqe_edquot = 1, lqe_edquot_nu == 0;
1247 * 1: new_edquot == 0 ->
1248 * lqe_edquot = 0, lqe_edquot_nu = 1;
1249 * 2: new_edquot == 1 ->
1250 * lqe_edquot = 1, lqe_edquot_nu = 0;
1251 * At the 2nd iteration lge_edquot comes back
1252 * to 1, so no changes and we don't need
1253 * to notify slave. */
1254 lgd->lqeg_arr[idx].lge_edquot_nu = !edquot_nu;
1258 __u64 lge_qunit, new_qunit;
1261 "tgt_idx %d idx %d lge_qunit_set %d lge_qunit %llu new_qunit %llu\n",
1263 lgd->lqeg_arr[idx].lge_qunit_set,
1264 lgd->lqeg_arr[idx].lge_qunit,
1266 /* lge for this idx is already set
1267 * on previous iteration */
1268 if (lgd->lqeg_arr[idx].lge_qunit_set)
1270 lge_qunit = lgd->lqeg_arr[idx].lge_qunit;
1271 new_qunit = lqe->lqe_qunit;
1272 /* qunit could be not set,
1273 * so use global lqe's qunit */
1277 if (lge_qunit != new_qunit)
1278 lgd->lqeg_arr[idx].lge_qunit =
1281 /* TODO: initially slaves notification was done
1282 * only for qunit shrinking. Should we always
1283 * notify slaves with new qunit ? */
1284 if (lge_qunit > new_qunit)
1285 lgd->lqeg_arr[idx].lge_qunit_nu = 1;
1286 lgd->lqeg_arr[idx].lge_qunit_set = 1;
1291 qmt_sarr_read_up(qpi);
1293 /* TODO: only for debug purposes - remove it later */
1294 for (i = 0; i < lgd->lqeg_num_used; i++)
1296 "lgd i %d tgt_idx %d qunit %lu nu %d; edquot %d nu %d\n",
1297 i, lgd->lqeg_arr[i].lge_idx,
1298 (unsigned long)lgd->lqeg_arr[i].lge_qunit,
1299 lgd->lqeg_arr[i].lge_qunit_nu,
1300 lgd->lqeg_arr[i].lge_edquot,
1301 lgd->lqeg_arr[i].lge_edquot_nu);
1306 void qmt_setup_lqe_gd(const struct lu_env *env, struct qmt_device *qmt,
1307 struct lquota_entry *lqe, struct lqe_glbl_data *lgd,
1314 qunit = lqe->lqe_qunit;
1315 edquot = lqe->lqe_edquot;
1317 /* Firstly set all elements in array with
1318 * qunit and edquot of global pool */
1319 qmt_sarr_read_down(lqe2qpi(lqe));
1320 for (i = 0; i < lgd->lqeg_num_used; i++) {
1321 lgd->lqeg_arr[i].lge_qunit = qunit;
1322 lgd->lqeg_arr[i].lge_edquot = edquot;
1323 /* It is the very first lvb setup - qunit and other flags
1324 * will be sent to slaves during qmt_lvbo_fill. */
1325 lgd->lqeg_arr[i].lge_qunit_nu = 0;
1326 lgd->lqeg_arr[i].lge_edquot_nu = 0;
1327 lgd->lqeg_arr[i].lge_idx = qmt_sarr_get_idx(lqe2qpi(lqe), i);
1329 qmt_sarr_read_up(lqe2qpi(lqe));
1331 qmt_pool_lqes_lookup_spec(env, qmt, pool_type,
1332 lqe_qtype(lqe), &lqe->lqe_id);
1333 qmt_seed_glbe(env, lgd, false);
1335 mutex_lock(&lqe->lqe_glbl_data_lock);
1336 if (lqe->lqe_glbl_data == NULL) {
1337 lqe->lqe_glbl_data = lgd;
1340 mutex_unlock(&lqe->lqe_glbl_data_lock);
1342 qmt_free_lqe_gd(lgd);
1344 qmt_id_lock_notify(qmt, lqe);