4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2016, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
31 #define DEBUG_SUBSYSTEM S_LQUOTA
33 #include "qmt_internal.h"
36 * Initialize qmt-specific fields of quota entry.
38 * \param lqe - is the quota entry to initialize
39 * \param arg - is the pointer to the qmt_pool_info structure
41 static void qmt_lqe_init(struct lquota_entry *lqe, void *arg)
43 LASSERT(lqe_is_master(lqe));
45 lqe->lqe_revoke_time = 0;
46 init_rwsem(&lqe->lqe_sem);
49 /* Apply the default quota setting to the specified quota entry
51 * \param env - is the environment passed by the caller
52 * \param pool - is the quota pool of the quota entry
53 * \param lqe - is the lquota_entry object to apply default quota on
54 * \param create_record - if true, an global quota record will be created and
58 * \retval -ve : other appropriate errors
60 int qmt_lqe_set_default(const struct lu_env *env, struct qmt_pool_info *pool,
61 struct lquota_entry *lqe, bool create_record)
63 struct lquota_entry *lqe_def;
68 if (lqe->lqe_id.qid_uid == 0)
71 lqe_def = pool->qpi_grace_lqe[lqe_qtype(lqe)];
73 LQUOTA_DEBUG(lqe, "inherit default quota");
75 lqe->lqe_is_default = true;
76 lqe->lqe_hardlimit = lqe_def->lqe_hardlimit;
77 lqe->lqe_softlimit = lqe_def->lqe_softlimit;
80 lqe->lqe_uptodate = true;
81 rc = qmt_set_with_lqe(env, pool->qpi_qmt, lqe, 0, 0,
82 LQUOTA_GRACE_FLAG(0, LQUOTA_FLAG_DEFAULT),
83 QIF_TIMES, true, false);
86 LQUOTA_ERROR(lqe, "failed to create the global quota"
90 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0)
91 lqe->lqe_enforced = false;
93 lqe->lqe_enforced = true;
99 * Update a lquota entry. This is done by reading quota settings from the global
100 * index. The lquota entry must be write locked.
102 * \param env - the environment passed by the caller
103 * \param lqe - is the quota entry to refresh
104 * \param arg - is the pointer to the qmt_pool_info structure
105 * \param find - don't create lqe on disk in case of ENOENT if true
107 static int qmt_lqe_read(const struct lu_env *env, struct lquota_entry *lqe,
108 void *arg, bool find)
110 struct qmt_thread_info *qti = qmt_info(env);
111 struct qmt_pool_info *pool = (struct qmt_pool_info *)arg;
115 LASSERT(lqe_is_master(lqe));
117 /* read record from disk */
118 rc = lquota_disk_read(env, pool->qpi_glb_obj[lqe->lqe_site->lqs_qtype],
119 &lqe->lqe_id, (struct dt_rec *)&qti->qti_glb_rec);
125 qmt_lqe_set_default(env, pool, lqe, true);
128 /* copy quota settings from on-disk record */
129 lqe->lqe_granted = qti->qti_glb_rec.qbr_granted;
130 lqe->lqe_hardlimit = qti->qti_glb_rec.qbr_hardlimit;
131 lqe->lqe_softlimit = qti->qti_glb_rec.qbr_softlimit;
132 lqe->lqe_gracetime = LQUOTA_GRACE(qti->qti_glb_rec.qbr_time);
134 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0 &&
135 (LQUOTA_FLAG(qti->qti_glb_rec.qbr_time) &
136 LQUOTA_FLAG_DEFAULT))
137 qmt_lqe_set_default(env, pool, lqe, false);
140 LQUOTA_ERROR(lqe, "failed to read quota entry from disk, rc:%d",
145 if (lqe->lqe_id.qid_uid == 0 ||
146 (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0))
147 /* {hard,soft}limit=0 means no quota enforced */
148 lqe->lqe_enforced = false;
150 lqe->lqe_enforced = true;
152 if (qmt_pool_global(pool))
153 lqe->lqe_is_global = 1;
155 LQUOTA_DEBUG(lqe, "read");
160 * Print lqe information for debugging.
162 * \param lqe - is the quota entry to debug
163 * \param arg - is the pointer to the qmt_pool_info structure
164 * \param msgdata - debug message
165 * \param fmt - format of debug message
167 static void qmt_lqe_debug(struct lquota_entry *lqe, void *arg,
168 struct libcfs_debug_msg_data *msgdata,
169 struct va_format *vaf)
171 struct qmt_pool_info *pool = (struct qmt_pool_info *)arg;
173 libcfs_debug_msg(msgdata,
174 "%pV qmt:%s pool:%s-%s id:%llu enforced:%d hard:%llu soft:%llu granted:%llu time:%llu qunit: %llu edquot:%d may_rel:%llu revoke:%lld default:%s\n",
175 vaf, pool->qpi_qmt->qmt_svname,
176 RES_NAME(pool->qpi_rtype),
178 lqe->lqe_id.qid_uid, lqe->lqe_enforced,
179 lqe->lqe_hardlimit, lqe->lqe_softlimit,
180 lqe->lqe_granted, lqe->lqe_gracetime,
181 lqe->lqe_qunit, lqe->lqe_edquot, lqe->lqe_may_rel,
182 lqe->lqe_revoke_time,
183 lqe->lqe_is_default ? "yes" : "no");
187 * Vector of quota entry operations supported on the master
189 struct lquota_entry_operations qmt_lqe_ops = {
190 .lqe_init = qmt_lqe_init,
191 .lqe_read = qmt_lqe_read,
192 .lqe_debug = qmt_lqe_debug,
196 * Reserve enough credits to update records in both the global index and
197 * the slave index identified by \slv_obj
199 * \param env - is the environment passed by the caller
200 * \param lqe - is the quota entry associated with the identifier
201 * subject to the change. If it is NULL lqes array is
202 * taken from env with qti_lqes_env(env).
203 * \param slv_obj - is the dt_object associated with the index file
204 * \param sync - make transaction sync if true
206 struct thandle *qmt_trans_start_with_slv(const struct lu_env *env,
207 struct lquota_entry *lqe,
208 struct dt_object *slv_obj,
211 struct qmt_device *qmt;
213 struct lquota_entry **lqes;
214 struct qmt_lqe_restore *restore;
218 restore = qti_lqes_rstr(env);
220 lqes_cnt = qti_lqes_cnt(env);
221 lqes = qti_lqes(env);
227 /* qmt is the same for all lqes, so take it from the 1st */
228 qmt = lqe2qpi(lqes[0])->qpi_qmt;
231 LQUOTA_DEBUG(lqes[0], "declare write for slv "DFID,
232 PFID(lu_object_fid(&slv_obj->do_lu)));
234 /* start transaction */
235 th = dt_trans_create(env, qmt->qmt_child);
240 /* quota settings on master are updated synchronously for the
244 /* reserve credits for global index update */
245 for (i = 0; i < lqes_cnt; i++) {
246 rc = lquota_disk_declare_write(env, th,
247 LQE_GLB_OBJ(lqes[i]),
253 if (slv_obj != NULL) {
254 /* reserve credits for slave index update */
255 rc = lquota_disk_declare_write(env, th, slv_obj, &lqe->lqe_id);
260 /* start transaction */
261 rc = dt_trans_start_local(env, qmt->qmt_child, th);
268 dt_trans_stop(env, qmt->qmt_child, th);
270 LQUOTA_ERROR(lqe, "failed to slv declare write for "DFID
271 ", rc:%d", PFID(lu_object_fid(&slv_obj->do_lu)),
274 for (i = 0; i < lqes_cnt; i++) {
275 restore[i].qlr_hardlimit = lqes[i]->lqe_hardlimit;
276 restore[i].qlr_softlimit = lqes[i]->lqe_softlimit;
277 restore[i].qlr_gracetime = lqes[i]->lqe_gracetime;
278 restore[i].qlr_granted = lqes[i]->lqe_granted;
279 restore[i].qlr_qunit = lqes[i]->lqe_qunit;
286 * Reserve enough credits to update a record in the global index
288 * \param env - is the environment passed by the caller
289 * \param lqe - is the quota entry to be modified in the global index
290 * \param restore - is a temporary storage for current quota settings which will
291 * be restored if something goes wrong at index update time.
293 struct thandle *qmt_trans_start(const struct lu_env *env,
294 struct lquota_entry *lqe)
296 LQUOTA_DEBUG(lqe, "declare write");
297 return qmt_trans_start_with_slv(env, lqe, NULL, true);
300 int qmt_glb_write_lqes(const struct lu_env *env, struct thandle *th,
301 __u32 flags, __u64 *ver)
306 for (i = 0; i < qti_lqes_cnt(env); i++) {
307 rc = qmt_glb_write(env, th, qti_lqes(env)[i], flags, ver);
315 * Update record associated with a quota entry in the global index.
316 * If LQUOTA_BUMP_VER is set, then the global index version must also be
318 * The entry must be at least read locked, dirty and up-to-date.
320 * \param env - the environment passed by the caller
321 * \param th - is the transaction handle to be used for the disk writes
322 * \param lqe - is the quota entry to udpate
323 * \param obj - is the dt_object associated with the index file
324 * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER.
325 * \param ver - is used to return the new version of the index.
327 * \retval - 0 on success and lqe dirty flag cleared,
328 * appropriate error on failure and uptodate flag cleared.
330 int qmt_glb_write(const struct lu_env *env, struct thandle *th,
331 struct lquota_entry *lqe, __u32 flags, __u64 *ver)
333 struct qmt_thread_info *qti = qmt_info(env);
334 struct lquota_glb_rec *rec;
338 LASSERT(lqe != NULL);
339 LASSERT(lqe_is_master(lqe));
340 LASSERT(lqe_is_locked(lqe));
341 LASSERT(lqe->lqe_uptodate);
342 LASSERT((flags & ~(LQUOTA_BUMP_VER | LQUOTA_SET_VER)) == 0);
344 LQUOTA_DEBUG(lqe, "write glb");
346 /* never delete the entry even when the id isn't enforced and
347 * no any guota granted, otherwise, this entry will not be
348 * synced to slave during the reintegration. */
349 rec = &qti->qti_glb_rec;
351 /* fill global index with updated quota settings */
352 rec->qbr_granted = lqe->lqe_granted;
353 if (lqe->lqe_is_default) {
354 rec->qbr_hardlimit = 0;
355 rec->qbr_softlimit = 0;
356 rec->qbr_time = LQUOTA_GRACE_FLAG(0, LQUOTA_FLAG_DEFAULT);
358 rec->qbr_hardlimit = lqe->lqe_hardlimit;
359 rec->qbr_softlimit = lqe->lqe_softlimit;
360 rec->qbr_time = lqe->lqe_gracetime;
363 /* write new quota settings */
364 rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id,
365 (struct dt_rec *)rec, flags, ver);
367 /* we failed to write the new quota settings to disk, report
368 * error to caller who will restore the initial value */
369 LQUOTA_ERROR(lqe, "failed to update global index, rc:%d", rc);
375 * Read from disk how much quota space is allocated to a slave.
376 * This is done by reading records from the dedicated slave index file.
377 * Return in \granted how much quota space is currently allocated to the
379 * The entry must be at least read locked.
381 * \param env - the environment passed by the caller
382 * \param lqe_id - is the quota id associated with the identifier to look-up
384 * \param slv_obj - is the dt_object associated with the slave index
385 * \param granted - is the output parameter where to return how much space
386 * is granted to the slave.
388 * \retval - 0 on success, appropriate error on failure
390 int qmt_slv_read(const struct lu_env *env, union lquota_id *qid,
391 struct dt_object *slv_obj, __u64 *granted)
393 struct qmt_thread_info *qti = qmt_info(env);
394 struct lquota_slv_rec *slv_rec = &qti->qti_slv_rec;
398 CDEBUG(D_QUOTA, "read id:%llu form slv "DFID"\n",
399 qid->qid_uid, PFID(lu_object_fid(&slv_obj->do_lu)));
401 /* read slave record from disk */
402 rc = lquota_disk_read(env, slv_obj, qid,
403 (struct dt_rec *)slv_rec);
409 /* extract granted from on-disk record */
410 *granted = slv_rec->qsr_granted;
413 CERROR("Failed to read slave record for %llu from "DFID"\n",
414 qid->qid_uid, PFID(lu_object_fid(&slv_obj->do_lu)));
418 CDEBUG(D_QUOTA, "Successful slv read %llu\n", *granted);
424 * Update record in slave index file.
425 * The entry must be at least read locked.
427 * \param env - the environment passed by the caller
428 * \param th - is the transaction handle to be used for the disk writes
429 * \param lqe - is the dirty quota entry which will be updated at the same time
431 * \param slv_obj - is the dt_object associated with the slave index
432 * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER.
433 * \param ver - is used to return the new version of the index.
434 * \param granted - is the new amount of quota space owned by the slave
436 * \retval - 0 on success, appropriate error on failure
438 int qmt_slv_write(const struct lu_env *env, struct thandle *th,
439 struct lquota_entry *lqe, struct dt_object *slv_obj,
440 __u32 flags, __u64 *ver, __u64 granted)
442 struct qmt_thread_info *qti = qmt_info(env);
443 struct lquota_slv_rec *rec;
447 LASSERT(lqe != NULL);
448 LASSERT(lqe_is_master(lqe));
449 LASSERT(lqe_is_locked(lqe));
451 LQUOTA_DEBUG(lqe, "write slv "DFID" granted:%llu",
452 PFID(lu_object_fid(&slv_obj->do_lu)), granted);
454 /* never delete the entry, otherwise, it'll not be transferred
455 * to slave during reintegration. */
456 rec = &qti->qti_slv_rec;
458 /* updated space granted to this slave */
459 rec->qsr_granted = granted;
461 /* write new granted space */
462 rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id,
463 (struct dt_rec *)rec, flags, ver);
466 "failed to update slave index "DFID" granted:%llu",
467 PFID(lu_object_fid(&slv_obj->do_lu)),
476 * Check whether new limits are valid for this pool
478 * \param lqe - is the quota entry subject to the setquota
479 * \param hard - is the new hard limit
480 * \param soft - is the new soft limit
482 int qmt_validate_limits(struct lquota_entry *lqe, __u64 hard, __u64 soft)
486 if (hard != 0 && soft > hard)
487 /* soft limit must be less than hard limit */
493 * Set/clear edquot flag after quota space allocation/release or settings
494 * change. Slaves will be notified of changes via glimpse on per-ID lock
496 * \param lqe - is the quota entry to check
497 * \param now - is the current time in second used for grace time managment
499 bool qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now)
501 struct qmt_pool_info *pool = lqe2qpi(lqe);
504 if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0)
507 if (!lqe->lqe_edquot) {
508 /* space exhausted flag not set, let's check whether it is time
511 if (!qmt_space_exhausted(lqe, now))
512 /* the qmt still has available space */
515 /* See comment in qmt_adjust_qunit(). LU-4139 */
516 if (qmt_hard_exhausted(lqe) ||
517 pool->qpi_rtype != LQUOTA_RES_DT) {
520 /* we haven't reached the minimal qunit yet so there is
521 * still hope that the rebalancing process might free
522 * up some quota space */
523 if (lqe->lqe_qunit != pool->qpi_least_qunit)
526 /* least qunit value not sent to all slaves yet */
527 if (lqe->lqe_revoke_time == 0)
530 /* Let's give more time to slave to release space */
531 lapse = ktime_get_seconds() - QMT_REBA_TIMEOUT;
532 if (lqe->lqe_may_rel != 0 && lqe->lqe_revoke_time > lapse)
535 if (lqe->lqe_qunit > pool->qpi_soft_least_qunit)
539 /* set edquot flag */
540 lqe->lqe_edquot = true;
542 /* space exhausted flag set, let's check whether it is time to
545 if (qmt_space_exhausted(lqe, now))
546 /* the qmt still has not space */
549 if (lqe->lqe_hardlimit != 0 &&
550 lqe->lqe_granted + pool->qpi_least_qunit >
552 /* we clear the flag only once at least one least qunit
556 /* clear edquot flag */
557 lqe->lqe_edquot = false;
560 LQUOTA_DEBUG(lqe, "changing edquot flag");
562 /* let's notify slave by issuing glimpse on per-ID lock.
563 * the rebalance thread will take care of this */
567 /* Using least_qunit when over block softlimit will seriously impact the
568 * write performance, we need to do some special tweaking on that. */
569 static __u64 qmt_calc_softlimit(struct lquota_entry *lqe, bool *oversoft)
571 struct qmt_pool_info *pool = lqe2qpi(lqe);
573 LASSERT(lqe->lqe_softlimit != 0);
575 /* No need to do special tweaking for inode limit */
576 if (pool->qpi_rtype != LQUOTA_RES_DT)
577 return lqe->lqe_softlimit;
579 if (lqe->lqe_granted <= lqe->lqe_softlimit +
580 pool->qpi_soft_least_qunit) {
581 return lqe->lqe_softlimit;
582 } else if (lqe->lqe_hardlimit != 0) {
584 return lqe->lqe_hardlimit;
592 * Try to grant more quota space back to slave.
594 * \param lqe - is the quota entry for which we would like to allocate more
596 * \param granted - is how much was already granted as part of the request
598 * \param spare - is how much unused quota space the slave already owns
600 * \retval return how additional space can be granted to the slave
602 __u64 qmt_alloc_expand(struct lquota_entry *lqe, __u64 granted, __u64 spare)
604 struct qmt_pool_info *pool = lqe2qpi(lqe);
605 __u64 remaining, qunit;
608 LASSERT(lqe->lqe_enforced && lqe->lqe_qunit != 0);
610 slv_cnt = qpi_slv_nr(lqe2qpi(lqe), lqe_qtype(lqe));
611 qunit = lqe->lqe_qunit;
613 /* See comment in qmt_adjust_qunit(). LU-4139. */
614 if (lqe->lqe_softlimit != 0) {
616 remaining = qmt_calc_softlimit(lqe, &oversoft);
618 remaining = lqe->lqe_granted +
619 pool->qpi_soft_least_qunit;
621 remaining = lqe->lqe_hardlimit;
624 if (lqe->lqe_granted >= remaining)
627 remaining -= lqe->lqe_granted;
633 granted &= (qunit - 1);
635 if (remaining > (slv_cnt * qunit) >> 1) {
636 /* enough room to grant more space w/o additional
637 * shrinking ... at least for now */
638 remaining -= (slv_cnt * qunit) >> 1;
639 } else if (qunit != pool->qpi_least_qunit) {
644 granted &= (qunit - 1);
646 RETURN(min_t(__u64, qunit - spare, remaining));
648 RETURN(min_t(__u64, qunit - granted, remaining));
649 } while (qunit >= pool->qpi_least_qunit);
655 * Adjust qunit size according to quota limits and total granted count.
656 * The caller must have locked the lqe.
658 * \param env - the environment passed by the caller
659 * \param lqe - is the qid entry to be adjusted
660 * \retval true - need reseed glbe array
662 bool qmt_adjust_qunit(const struct lu_env *env, struct lquota_entry *lqe)
664 struct qmt_pool_info *pool = lqe2qpi(lqe);
665 bool need_reseed = false;
667 __u64 qunit, limit, qunit2 = 0;
670 LASSERT(lqe_is_locked(lqe));
672 if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0)
673 /* no quota limits */
676 /* record how many slaves have already registered */
677 slv_cnt = qpi_slv_nr(pool, lqe_qtype(lqe));
679 /* Pool hasn't slaves anymore. Qunit will be adjusted
680 * again when new slaves would be added. */
681 if (lqe->lqe_qunit) {
685 /* wait for at least one slave to join */
689 /* Qunit calculation is based on soft limit, if any, hard limit
690 * otherwise. This means that qunit is shrunk to the minimum when
691 * beyond the soft limit. This will impact performance, but that's the
692 * price of an accurate grace time management. */
693 if (lqe->lqe_softlimit != 0) {
695 /* As a compromise of write performance and the grace time
696 * accuracy, the block qunit size will be shrunk to
697 * qpi_soft_least_qunit when over softlimit. LU-4139. */
698 limit = qmt_calc_softlimit(lqe, &oversoft);
700 qunit2 = pool->qpi_soft_least_qunit;
702 GOTO(done, qunit = qunit2);
703 } else if (lqe->lqe_hardlimit != 0) {
704 limit = lqe->lqe_hardlimit;
706 LQUOTA_ERROR(lqe, "enforced bit set, but neither hard nor soft "
711 qunit = lqe->lqe_qunit == 0 ? pool->qpi_least_qunit : lqe->lqe_qunit;
713 /* The qunit value is computed as follows: limit / (2 * slv_cnt).
714 * Then 75% of the quota space can be granted with current qunit value.
715 * The remaining 25% are then used with reduced qunit size (by a factor
716 * of 4) which is then divided in a similar manner.
718 * |---------------------limit---------------------|
719 * |-------limit / 2-------|-limit / 4-|-limit / 4-|
720 * |qunit|qunit|qunit|qunit| | |
721 * |----slv_cnt * qunit----| | |
722 * |-grow limit-| | | |
723 * |--------------shrink limit---------| |
724 * |---space granted in qunit chunks---|-remaining-|
730 * qunit >>= 2; |qunit*slv_cnt|qunit*slv_cnt|
731 * |---space in qunit---|remain|
733 if (qunit == pool->qpi_least_qunit ||
734 limit >= lqe->lqe_granted + ((slv_cnt * qunit) >> 1)) {
735 /* current qunit value still fits, let's see if we can afford to
736 * increase qunit now ...
737 * To increase qunit again, we have to be under 25% */
738 while (qunit && limit >= lqe->lqe_granted + 6 * qunit * slv_cnt)
743 do_div(qunit, 2 * slv_cnt);
747 /* shrink qunit until we find a suitable value */
748 while (qunit > pool->qpi_least_qunit &&
749 limit < lqe->lqe_granted + ((slv_cnt * qunit) >> 1))
753 if (qunit2 && qunit > qunit2)
756 if (lqe->lqe_qunit == qunit)
757 /* keep current qunit */
760 LQUOTA_DEBUG(lqe, "%s qunit to %llu",
761 lqe->lqe_qunit < qunit ? "increasing" : "decreasing",
764 /* store new qunit value */
765 swap(lqe->lqe_qunit, qunit);
767 /* reseed glbe array and notify
768 * slave if qunit was shrinked */
770 /* reset revoke time */
771 lqe->lqe_revoke_time = 0;
773 if (lqe->lqe_qunit >= qunit &&
774 (lqe->lqe_qunit == pool->qpi_least_qunit)) {
775 /* initial qunit value is the smallest one */
776 lqe->lqe_revoke_time = ktime_get_seconds();
781 bool qmt_adjust_edquot_qunit_notify(const struct lu_env *env,
782 struct qmt_device *qmt,
783 __u64 now, bool edquot,
784 bool qunit, __u32 qb_flags)
786 struct lquota_entry *lqe_gl, *lqe;
787 bool need_reseed = false;
790 lqe_gl = qti_lqes_glbl(env);
792 for (i = 0; i < qti_lqes_cnt(env); i++) {
793 lqe = qti_lqes(env)[i];
795 need_reseed |= qmt_adjust_qunit(env, lqe);
797 need_reseed |= qmt_adjust_edquot(lqe, now);
801 if (!lqe_gl->lqe_glbl_data &&
802 (req_has_rep(qb_flags) || req_is_rel(qb_flags))) {
804 CWARN("%s: can't notify - lge_glbl_data is not set",
809 if (lqe_gl->lqe_glbl_data && need_reseed) {
810 qmt_seed_glbe_all(env, lqe_gl->lqe_glbl_data, qunit, edquot);
811 qmt_id_lock_notify(qmt, lqe_gl);
818 * Adjust qunit & edquot flag in case it wasn't initialized already (e.g.
819 * limit set while no slaves were connected yet)
821 bool qmt_revalidate(const struct lu_env *env, struct lquota_entry *lqe)
823 bool need_notify = false;
825 if (lqe->lqe_qunit == 0) {
826 /* lqe was read from disk, but neither qunit, nor edquot flag
827 * were initialized */
828 need_notify = qmt_adjust_qunit(env, lqe);
829 if (lqe->lqe_qunit != 0)
830 need_notify |= qmt_adjust_edquot(lqe,
831 ktime_get_real_seconds());
837 void qmt_revalidate_lqes(const struct lu_env *env,
838 struct qmt_device *qmt, __u32 qb_flags)
840 struct lquota_entry *lqe_gl = qti_lqes_glbl(env);
841 bool need_notify = false;
844 for (i = 0; i < qti_lqes_cnt(env); i++)
845 need_notify |= qmt_revalidate(env, qti_lqes(env)[i]);
847 /* There could be no ID lock to the moment of reconciliation.
848 * As a result lqe global data is not initialised yet. It is ok
849 * for release and report requests. */
850 if (!lqe_gl->lqe_glbl_data &&
851 (req_is_rel(qb_flags) || req_has_rep(qb_flags)))
855 qmt_seed_glbe(env, lqe_gl->lqe_glbl_data);
856 qmt_id_lock_notify(qmt, lqe_gl);
860 void qti_lqes_init(const struct lu_env *env)
862 struct qmt_thread_info *qti = qmt_info(env);
864 qti->qti_lqes_cnt = 0;
865 qti->qti_glbl_lqe_idx = 0;
866 qti->qti_lqes_num = QMT_MAX_POOL_NUM;
869 int qti_lqes_add(const struct lu_env *env, struct lquota_entry *lqe)
871 struct qmt_thread_info *qti = qmt_info(env);
873 if (qti->qti_lqes_cnt > qti->qti_lqes_num) {
874 struct lquota_entry **lqes;
875 lqes = qti->qti_lqes;
876 OBD_ALLOC(lqes, sizeof(lqe) * qti->qti_lqes_num * 2);
879 memcpy(lqes, qti_lqes(env), qti->qti_lqes_cnt * sizeof(lqe));
880 /* Don't need to free, if it is the very 1st allocation */
881 if (qti->qti_lqes_num > QMT_MAX_POOL_NUM)
882 OBD_FREE(qti->qti_lqes,
883 qti->qti_lqes_num * sizeof(lqe));
884 qti->qti_lqes = lqes;
885 qti->qti_lqes_num *= 2;
888 if (lqe->lqe_is_global)
889 qti->qti_glbl_lqe_idx = qti->qti_lqes_cnt;
890 qti_lqes(env)[qti->qti_lqes_cnt++] = lqe;
892 /* The pool could be accessed directly from lqe, so take
893 * extra reference that is put in qti_lqes_fini */
894 qpi_getref(lqe2qpi(lqe));
896 CDEBUG(D_QUOTA, "LQE %p %lu is added, lqe_cnt %d lqes_num %d\n",
897 lqe, (long unsigned)lqe->lqe_id.qid_uid,
898 qti->qti_lqes_cnt, qti->qti_lqes_num);
899 LASSERT(qti->qti_lqes_num != 0);
904 void qti_lqes_del(const struct lu_env *env, int index)
906 struct lquota_entry **lqes;
907 int lqes_cnt = qti_lqes_cnt(env);
908 int lqep_size = sizeof(struct lquota_entry *);
911 /* We can't handle non global lqes correctly without
912 * global lqe located at index 0. If we try to do so,
913 * something goes wrong. */
914 LQUOTA_ERROR(qti_lqes_glbl(env),
915 "quota: cannot remove lqe at index 0 as it is global");
916 LASSERT(qti_lqes_glbl(env)->lqe_is_global);
919 lqes = qti_lqes(env);
920 qpi_putref(env, lqe2qpi(lqes[index]));
921 lqe_putref(lqes[index]);
922 memcpy((unsigned char *)lqes + index * lqep_size,
923 (unsigned char *)lqes + (index + 1) * lqep_size,
924 (lqes_cnt - index - 1) * lqep_size);
928 void qti_lqes_fini(const struct lu_env *env)
930 struct qmt_thread_info *qti = qmt_info(env);
931 struct lquota_entry **lqes = qti->qti_lqes;
934 lqes = qti_lqes(env);
935 for (i = 0; i < qti->qti_lqes_cnt; i++) {
936 qpi_putref(env, lqe2qpi(lqes[i]));
940 if (qti->qti_lqes_num > QMT_MAX_POOL_NUM)
941 OBD_FREE(qti->qti_lqes,
942 qti->qti_lqes_num * sizeof(struct lquota_entry *));
945 inline int qti_lqes_min_qunit(const struct lu_env *env)
949 for (i = 1, min = qti_lqe_qunit(env, 0); i < qti_lqes_cnt(env); i++) {
950 qunit = qti_lqe_qunit(env, i);
958 inline int qti_lqes_edquot(const struct lu_env *env)
962 for (i = 0; i < qti_lqes_cnt(env); i++) {
963 if (qti_lqes(env)[i]->lqe_edquot)
970 inline int qti_lqes_restore_init(const struct lu_env *env)
974 if (qti_lqes_cnt(env) > QMT_MAX_POOL_NUM) {
975 OBD_ALLOC(qmt_info(env)->qti_lqes_rstr,
976 qti_lqes_cnt(env) * sizeof(struct qmt_lqe_restore));
977 if (!qti_lqes_rstr(env))
984 inline void qti_lqes_restore_fini(const struct lu_env *env)
986 if (qti_lqes_cnt(env) > QMT_MAX_POOL_NUM)
987 OBD_FREE(qmt_info(env)->qti_lqes_rstr,
988 qti_lqes_cnt(env) * sizeof(struct qmt_lqe_restore));
991 inline void qti_lqes_write_lock(const struct lu_env *env)
995 for (i = 0; i < qti_lqes_cnt(env); i++)
996 lqe_write_lock(qti_lqes(env)[i]);
999 inline void qti_lqes_write_unlock(const struct lu_env *env)
1003 for (i = 0; i < qti_lqes_cnt(env); i++)
1004 lqe_write_unlock(qti_lqes(env)[i]);
1007 #define QMT_INIT_SLV_CNT 64
1008 struct lqe_glbl_data *qmt_alloc_lqe_gd(struct qmt_pool_info *pool, int qtype)
1010 struct lqe_glbl_data *lgd;
1011 struct lqe_glbl_entry *lqeg_arr;
1012 int slv_cnt, glbe_num;
1014 OBD_ALLOC(lgd, sizeof(struct lqe_glbl_data));
1018 slv_cnt = qpi_slv_nr_by_rtype(pool, qtype);
1020 glbe_num = slv_cnt < QMT_INIT_SLV_CNT ? QMT_INIT_SLV_CNT : slv_cnt;
1021 OBD_ALLOC(lqeg_arr, sizeof(struct lqe_glbl_entry) * glbe_num);
1023 OBD_FREE(lgd, sizeof(struct lqe_glbl_data));
1027 CDEBUG(D_QUOTA, "slv_cnt %d glbe_num %d\n", slv_cnt, glbe_num);
1029 lgd->lqeg_num_used = slv_cnt;
1030 lgd->lqeg_num_alloc = glbe_num;
1031 lgd->lqeg_arr = lqeg_arr;
1036 void qmt_free_lqe_gd(struct lqe_glbl_data *lgd)
1038 OBD_FREE(lgd->lqeg_arr,
1039 sizeof(struct lqe_glbl_entry) * lgd->lqeg_num_alloc);
1040 OBD_FREE(lgd, sizeof(struct lqe_glbl_data));
1043 void qmt_seed_glbe_all(const struct lu_env *env, struct lqe_glbl_data *lgd,
1044 bool qunit, bool edquot)
1046 struct rw_semaphore *sem = NULL;
1047 struct qmt_pool_info *qpi;
1051 /* lqes array is sorted by qunit - the first entry has minimum qunit.
1052 * Thus start seeding global qunit's array beginning from the 1st lqe
1053 * and appropriate pool. If pools overlapped, slaves from this
1054 * overlapping get minimum qunit value.
1055 * user1: pool1, pool2, pool_glbl;
1056 * pool1: OST1; user1_qunit = 10M;
1057 * pool2: OST0, OST1, OST2; user1_qunit = 30M;
1058 * pool_glbl: OST0, OST1, OST2, OST3; user1_qunit = 160M;
1059 * qunit array after seeding should be:
1060 * OST0: 30M; OST1: 10M; OST2: 30M; OST3: 160M; */
1062 /* edquot resetup algorythm works fine
1063 * with not sorted lqes */
1067 for (i = 0; i < lgd->lqeg_num_used; i++) {
1068 lgd->lqeg_arr[i].lge_qunit_set = 0;
1069 lgd->lqeg_arr[i].lge_qunit_nu = 0;
1070 lgd->lqeg_arr[i].lge_edquot_nu = 0;
1073 for (i = 0; i < qti_lqes_cnt(env); i++) {
1074 struct lquota_entry *lqe = qti_lqes(env)[i];
1077 CDEBUG(D_QUOTA, "lqes_cnt %d, i %d\n", qti_lqes_cnt(env), i);
1079 if (qmt_pool_global(qpi)) {
1080 slaves_cnt = qpi_slv_nr_by_rtype(lqe2qpi(lqe),
1083 sem = qmt_sarr_rwsem(qpi);
1085 slaves_cnt = qmt_sarr_count(qpi);
1088 for (j = 0; j < slaves_cnt; j++) {
1089 idx = qmt_sarr_get_idx(qpi, j);
1093 int lge_edquot, new_edquot, edquot_nu;
1095 lge_edquot = lgd->lqeg_arr[idx].lge_edquot;
1096 edquot_nu = lgd->lqeg_arr[idx].lge_edquot_nu;
1097 new_edquot = lqe->lqe_edquot;
1099 if (lge_edquot == new_edquot ||
1100 (edquot_nu && lge_edquot == 1))
1102 lgd->lqeg_arr[idx].lge_edquot = new_edquot;
1103 /* it is needed for the following case:
1104 * initial values for idx i -
1105 * lqe_edquot = 1, lqe_edquot_nu == 0;
1106 * 1: new_edquot == 0 ->
1107 * lqe_edquot = 0, lqe_edquot_nu = 1;
1108 * 2: new_edquot == 1 ->
1109 * lqe_edquot = 1, lqe_edquot_nu = 0;
1110 * At the 2nd iteration lge_edquot comes back
1111 * to 1, so no changes and we don't need
1112 * to notify slave. */
1113 lgd->lqeg_arr[idx].lge_edquot_nu = !edquot_nu;
1117 __u64 lge_qunit, new_qunit;
1120 "idx %d lge_qunit_set %d lge_qunit %llu new_qunit %llu\n",
1121 idx, lgd->lqeg_arr[idx].lge_qunit_set,
1122 lgd->lqeg_arr[idx].lge_qunit,
1124 /* lge for this idx is already set
1125 * on previous iteration */
1126 if (lgd->lqeg_arr[idx].lge_qunit_set)
1128 lge_qunit = lgd->lqeg_arr[idx].lge_qunit;
1129 new_qunit = lqe->lqe_qunit;
1130 /* qunit could be not set,
1131 * so use global lqe's qunit */
1135 if (lge_qunit != new_qunit)
1136 lgd->lqeg_arr[idx].lge_qunit =
1139 /* TODO: initially slaves notification was done
1140 * only for qunit shrinking. Should we always
1141 * notify slaves with new qunit ? */
1142 if (lge_qunit > new_qunit)
1143 lgd->lqeg_arr[idx].lge_qunit_nu = 1;
1144 lgd->lqeg_arr[idx].lge_qunit_set = 1;
1148 if (!qmt_pool_global(qpi))
1151 /* TODO: only for debug purposes - remove it later */
1152 for (i = 0; i < lgd->lqeg_num_used; i++)
1154 "lgd ost %d, qunit %lu nu %d; edquot %d nu %d\n",
1155 i, (long unsigned)lgd->lqeg_arr[i].lge_qunit,
1156 lgd->lqeg_arr[i].lge_qunit_nu,
1157 lgd->lqeg_arr[i].lge_edquot,
1158 lgd->lqeg_arr[i].lge_edquot_nu);
1163 void qmt_setup_lqe_gd(const struct lu_env *env, struct qmt_device *qmt,
1164 struct lquota_entry *lqe, struct lqe_glbl_data *lgd,
1171 qunit = lqe->lqe_qunit;
1172 edquot = lqe->lqe_edquot;
1174 /* Firstly set all elements in array with
1175 * qunit and edquot of global pool */
1176 for (i = 0; i < lgd->lqeg_num_used; i++) {
1177 lgd->lqeg_arr[i].lge_qunit = qunit;
1178 lgd->lqeg_arr[i].lge_edquot = edquot;
1179 /* It is the very first lvb setup - qunit and other flags
1180 * will be sent to slaves during qmt_lvbo_fill. */
1181 lgd->lqeg_arr[i].lge_qunit_nu = 0;
1182 lgd->lqeg_arr[i].lge_edquot_nu = 0;
1185 qmt_pool_lqes_lookup_spec(env, qmt, pool_type,
1186 lqe_qtype(lqe), &lqe->lqe_id);
1187 qmt_seed_glbe(env, lgd);
1189 lqe->lqe_glbl_data = lgd;
1190 qmt_id_lock_notify(qmt, lqe);