4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2016, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
31 #define DEBUG_SUBSYSTEM S_LQUOTA
33 #include "qmt_internal.h"
36 * Initialize qmt-specific fields of quota entry.
38 * \param lqe - is the quota entry to initialize
39 * \param arg - is the pointer to the qmt_pool_info structure
41 static void qmt_lqe_init(struct lquota_entry *lqe, void *arg)
43 LASSERT(lqe_is_master(lqe));
45 lqe->lqe_revoke_time = 0;
46 init_rwsem(&lqe->lqe_sem);
49 /* Apply the default quota setting to the specified quota entry
51 * \param env - is the environment passed by the caller
52 * \param pool - is the quota pool of the quota entry
53 * \param lqe - is the lquota_entry object to apply default quota on
54 * \param create_record - if true, an global quota record will be created and
58 * \retval -ve : other appropriate errors
60 int qmt_lqe_set_default(const struct lu_env *env, struct qmt_pool_info *pool,
61 struct lquota_entry *lqe, bool create_record)
63 struct lquota_entry *lqe_def;
68 if (lqe->lqe_id.qid_uid == 0)
71 lqe_def = pool->qpi_grace_lqe[lqe->lqe_site->lqs_qtype];
73 LQUOTA_DEBUG(lqe, "inherit default quota");
75 lqe->lqe_is_default = true;
76 lqe->lqe_hardlimit = lqe_def->lqe_hardlimit;
77 lqe->lqe_softlimit = lqe_def->lqe_softlimit;
80 lqe->lqe_uptodate = true;
81 rc = qmt_set_with_lqe(env, pool->qpi_qmt, lqe, 0, 0,
82 LQUOTA_GRACE_FLAG(0, LQUOTA_FLAG_DEFAULT),
83 QIF_TIMES, true, false);
86 LQUOTA_ERROR(lqe, "failed to create the global quota"
90 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0)
91 lqe->lqe_enforced = false;
93 lqe->lqe_enforced = true;
99 * Update a lquota entry. This is done by reading quota settings from the global
100 * index. The lquota entry must be write locked.
102 * \param env - the environment passed by the caller
103 * \param lqe - is the quota entry to refresh
104 * \param arg - is the pointer to the qmt_pool_info structure
106 static int qmt_lqe_read(const struct lu_env *env, struct lquota_entry *lqe,
109 struct qmt_thread_info *qti = qmt_info(env);
110 struct qmt_pool_info *pool = (struct qmt_pool_info *)arg;
114 LASSERT(lqe_is_master(lqe));
116 /* read record from disk */
117 rc = lquota_disk_read(env, pool->qpi_glb_obj[lqe->lqe_site->lqs_qtype],
118 &lqe->lqe_id, (struct dt_rec *)&qti->qti_glb_rec);
122 qmt_lqe_set_default(env, pool, lqe, true);
125 /* copy quota settings from on-disk record */
126 lqe->lqe_granted = qti->qti_glb_rec.qbr_granted;
127 lqe->lqe_hardlimit = qti->qti_glb_rec.qbr_hardlimit;
128 lqe->lqe_softlimit = qti->qti_glb_rec.qbr_softlimit;
129 lqe->lqe_gracetime = LQUOTA_GRACE(qti->qti_glb_rec.qbr_time);
131 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0 &&
132 (LQUOTA_FLAG(qti->qti_glb_rec.qbr_time) &
133 LQUOTA_FLAG_DEFAULT))
134 qmt_lqe_set_default(env, pool, lqe, false);
137 LQUOTA_ERROR(lqe, "failed to read quota entry from disk, rc:%d",
142 if (lqe->lqe_id.qid_uid == 0 ||
143 (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0))
144 /* {hard,soft}limit=0 means no quota enforced */
145 lqe->lqe_enforced = false;
147 lqe->lqe_enforced = true;
149 LQUOTA_DEBUG(lqe, "read");
154 * Print lqe information for debugging.
156 * \param lqe - is the quota entry to debug
157 * \param arg - is the pointer to the qmt_pool_info structure
158 * \param msgdata - debug message
159 * \param fmt - format of debug message
161 static void qmt_lqe_debug(struct lquota_entry *lqe, void *arg,
162 struct libcfs_debug_msg_data *msgdata,
163 const char *fmt, va_list args)
165 struct qmt_pool_info *pool = (struct qmt_pool_info *)arg;
167 libcfs_debug_vmsg2(msgdata, fmt, args,
168 "qmt:%s pool:%d-%s id:%llu enforced:%d hard:%llu"
169 " soft:%llu granted:%llu time:%llu qunit: %llu"
170 " edquot:%d may_rel:%llu revoke:%lld default:%s\n",
171 pool->qpi_qmt->qmt_svname,
172 pool->qpi_key & 0x0000ffff,
173 RES_NAME(pool->qpi_key >> 16),
174 lqe->lqe_id.qid_uid, lqe->lqe_enforced,
175 lqe->lqe_hardlimit, lqe->lqe_softlimit,
176 lqe->lqe_granted, lqe->lqe_gracetime,
177 lqe->lqe_qunit, lqe->lqe_edquot, lqe->lqe_may_rel,
178 lqe->lqe_revoke_time,
179 lqe->lqe_is_default ? "yes" : "no");
183 * Vector of quota entry operations supported on the master
185 struct lquota_entry_operations qmt_lqe_ops = {
186 .lqe_init = qmt_lqe_init,
187 .lqe_read = qmt_lqe_read,
188 .lqe_debug = qmt_lqe_debug,
192 * Reserve enough credits to update records in both the global index and
193 * the slave index identified by \slv_obj
195 * \param env - is the environment passed by the caller
196 * \param lqe - is the quota entry associated with the identifier
197 * subject to the change
198 * \param slv_obj - is the dt_object associated with the index file
199 * \param restore - is a temporary storage for current quota settings which will
200 * be restored if something goes wrong at index update time.
202 struct thandle *qmt_trans_start_with_slv(const struct lu_env *env,
203 struct lquota_entry *lqe,
204 struct dt_object *slv_obj,
205 struct qmt_lqe_restore *restore)
207 struct qmt_device *qmt;
212 LASSERT(lqe != NULL);
213 LASSERT(lqe_is_master(lqe));
215 qmt = lqe2qpi(lqe)->qpi_qmt;
218 LQUOTA_DEBUG(lqe, "declare write for slv "DFID,
219 PFID(lu_object_fid(&slv_obj->do_lu)));
221 /* start transaction */
222 th = dt_trans_create(env, qmt->qmt_child);
227 /* quota settings on master are updated synchronously for the
231 /* reserve credits for global index update */
232 rc = lquota_disk_declare_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id);
236 if (slv_obj != NULL) {
237 /* reserve credits for slave index update */
238 rc = lquota_disk_declare_write(env, th, slv_obj, &lqe->lqe_id);
243 /* start transaction */
244 rc = dt_trans_start_local(env, qmt->qmt_child, th);
251 dt_trans_stop(env, qmt->qmt_child, th);
253 LQUOTA_ERROR(lqe, "failed to slv declare write for "DFID
254 ", rc:%d", PFID(lu_object_fid(&slv_obj->do_lu)),
257 restore->qlr_hardlimit = lqe->lqe_hardlimit;
258 restore->qlr_softlimit = lqe->lqe_softlimit;
259 restore->qlr_gracetime = lqe->lqe_gracetime;
260 restore->qlr_granted = lqe->lqe_granted;
261 restore->qlr_qunit = lqe->lqe_qunit;
267 * Reserve enough credits to update a record in the global index
269 * \param env - is the environment passed by the caller
270 * \param lqe - is the quota entry to be modified in the global index
271 * \param restore - is a temporary storage for current quota settings which will
272 * be restored if something goes wrong at index update time.
274 struct thandle *qmt_trans_start(const struct lu_env *env,
275 struct lquota_entry *lqe,
276 struct qmt_lqe_restore *restore)
278 LQUOTA_DEBUG(lqe, "declare write");
279 return qmt_trans_start_with_slv(env, lqe, NULL, restore);
283 * Update record associated with a quota entry in the global index.
284 * If LQUOTA_BUMP_VER is set, then the global index version must also be
286 * The entry must be at least read locked, dirty and up-to-date.
288 * \param env - the environment passed by the caller
289 * \param th - is the transaction handle to be used for the disk writes
290 * \param lqe - is the quota entry to udpate
291 * \param obj - is the dt_object associated with the index file
292 * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER.
293 * \param ver - is used to return the new version of the index.
295 * \retval - 0 on success and lqe dirty flag cleared,
296 * appropriate error on failure and uptodate flag cleared.
298 int qmt_glb_write(const struct lu_env *env, struct thandle *th,
299 struct lquota_entry *lqe, __u32 flags, __u64 *ver)
301 struct qmt_thread_info *qti = qmt_info(env);
302 struct lquota_glb_rec *rec;
306 LASSERT(lqe != NULL);
307 LASSERT(lqe_is_master(lqe));
308 LASSERT(lqe_is_locked(lqe));
309 LASSERT(lqe->lqe_uptodate);
310 LASSERT((flags & ~(LQUOTA_BUMP_VER | LQUOTA_SET_VER)) == 0);
312 LQUOTA_DEBUG(lqe, "write glb");
314 /* never delete the entry even when the id isn't enforced and
315 * no any guota granted, otherwise, this entry will not be
316 * synced to slave during the reintegration. */
317 rec = &qti->qti_glb_rec;
319 /* fill global index with updated quota settings */
320 rec->qbr_granted = lqe->lqe_granted;
321 if (lqe->lqe_is_default) {
322 rec->qbr_hardlimit = 0;
323 rec->qbr_softlimit = 0;
324 rec->qbr_time = LQUOTA_GRACE_FLAG(0, LQUOTA_FLAG_DEFAULT);
326 rec->qbr_hardlimit = lqe->lqe_hardlimit;
327 rec->qbr_softlimit = lqe->lqe_softlimit;
328 rec->qbr_time = lqe->lqe_gracetime;
331 /* write new quota settings */
332 rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id,
333 (struct dt_rec *)rec, flags, ver);
335 /* we failed to write the new quota settings to disk, report
336 * error to caller who will restore the initial value */
337 LQUOTA_ERROR(lqe, "failed to update global index, rc:%d", rc);
343 * Read from disk how much quota space is allocated to a slave.
344 * This is done by reading records from the dedicated slave index file.
345 * Return in \granted how much quota space is currently allocated to the
347 * The entry must be at least read locked.
349 * \param env - the environment passed by the caller
350 * \param lqe - is the quota entry associated with the identifier to look-up
352 * \param slv_obj - is the dt_object associated with the slave index
353 * \param granted - is the output parameter where to return how much space
354 * is granted to the slave.
356 * \retval - 0 on success, appropriate error on failure
358 int qmt_slv_read(const struct lu_env *env, struct lquota_entry *lqe,
359 struct dt_object *slv_obj, __u64 *granted)
361 struct qmt_thread_info *qti = qmt_info(env);
362 struct lquota_slv_rec *slv_rec = &qti->qti_slv_rec;
366 LASSERT(lqe != NULL);
367 LASSERT(lqe_is_master(lqe));
368 LASSERT(lqe_is_locked(lqe));
370 LQUOTA_DEBUG(lqe, "read slv "DFID,
371 PFID(lu_object_fid(&slv_obj->do_lu)));
373 /* read slave record from disk */
374 rc = lquota_disk_read(env, slv_obj, &lqe->lqe_id,
375 (struct dt_rec *)slv_rec);
381 /* extract granted from on-disk record */
382 *granted = slv_rec->qsr_granted;
385 LQUOTA_ERROR(lqe, "failed to read slave record "DFID,
386 PFID(lu_object_fid(&slv_obj->do_lu)));
390 LQUOTA_DEBUG(lqe, "successful slv read %llu", *granted);
396 * Update record in slave index file.
397 * The entry must be at least read locked.
399 * \param env - the environment passed by the caller
400 * \param th - is the transaction handle to be used for the disk writes
401 * \param lqe - is the dirty quota entry which will be updated at the same time
403 * \param slv_obj - is the dt_object associated with the slave index
404 * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER.
405 * \param ver - is used to return the new version of the index.
406 * \param granted - is the new amount of quota space owned by the slave
408 * \retval - 0 on success, appropriate error on failure
410 int qmt_slv_write(const struct lu_env *env, struct thandle *th,
411 struct lquota_entry *lqe, struct dt_object *slv_obj,
412 __u32 flags, __u64 *ver, __u64 granted)
414 struct qmt_thread_info *qti = qmt_info(env);
415 struct lquota_slv_rec *rec;
419 LASSERT(lqe != NULL);
420 LASSERT(lqe_is_master(lqe));
421 LASSERT(lqe_is_locked(lqe));
423 LQUOTA_DEBUG(lqe, "write slv "DFID" granted:%llu",
424 PFID(lu_object_fid(&slv_obj->do_lu)), granted);
426 /* never delete the entry, otherwise, it'll not be transferred
427 * to slave during reintegration. */
428 rec = &qti->qti_slv_rec;
430 /* updated space granted to this slave */
431 rec->qsr_granted = granted;
433 /* write new granted space */
434 rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id,
435 (struct dt_rec *)rec, flags, ver);
437 LQUOTA_ERROR(lqe, "failed to update slave index "DFID" granted:"
438 "%llu", PFID(lu_object_fid(&slv_obj->do_lu)),
447 * Check whether new limits are valid for this pool
449 * \param lqe - is the quota entry subject to the setquota
450 * \param hard - is the new hard limit
451 * \param soft - is the new soft limit
453 int qmt_validate_limits(struct lquota_entry *lqe, __u64 hard, __u64 soft)
457 if (hard != 0 && soft > hard)
458 /* soft limit must be less than hard limit */
464 * Set/clear edquot flag after quota space allocation/release or settings
465 * change. Slaves will be notified of changes via glimpse on per-ID lock
467 * \param lqe - is the quota entry to check
468 * \param now - is the current time in second used for grace time managment
470 void qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now)
472 struct qmt_pool_info *pool = lqe2qpi(lqe);
475 if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0)
478 if (!lqe->lqe_edquot) {
479 /* space exhausted flag not set, let's check whether it is time
482 if (!qmt_space_exhausted(lqe, now))
483 /* the qmt still has available space */
486 /* See comment in qmt_adjust_qunit(). LU-4139 */
487 if (qmt_hard_exhausted(lqe) ||
488 pool->qpi_key >> 16 != LQUOTA_RES_DT) {
491 /* we haven't reached the minimal qunit yet so there is
492 * still hope that the rebalancing process might free
493 * up some quota space */
494 if (lqe->lqe_qunit != pool->qpi_least_qunit)
497 /* least qunit value not sent to all slaves yet */
498 if (lqe->lqe_revoke_time == 0)
501 /* Let's give more time to slave to release space */
502 lapse = ktime_get_seconds() - QMT_REBA_TIMEOUT;
503 if (lqe->lqe_may_rel != 0 && lqe->lqe_revoke_time > lapse)
506 if (lqe->lqe_qunit > pool->qpi_soft_least_qunit)
510 /* set edquot flag */
511 lqe->lqe_edquot = true;
513 /* space exhausted flag set, let's check whether it is time to
516 if (qmt_space_exhausted(lqe, now))
517 /* the qmt still has not space */
520 if (lqe->lqe_hardlimit != 0 &&
521 lqe->lqe_granted + pool->qpi_least_qunit >
523 /* we clear the flag only once at least one least qunit
527 /* clear edquot flag */
528 lqe->lqe_edquot = false;
531 LQUOTA_DEBUG(lqe, "changing edquot flag");
533 /* let's notify slave by issuing glimpse on per-ID lock.
534 * the rebalance thread will take care of this */
535 qmt_id_lock_notify(pool->qpi_qmt, lqe);
539 /* Using least_qunit when over block softlimit will seriously impact the
540 * write performance, we need to do some special tweaking on that. */
541 static __u64 qmt_calc_softlimit(struct lquota_entry *lqe, bool *oversoft)
543 struct qmt_pool_info *pool = lqe2qpi(lqe);
545 LASSERT(lqe->lqe_softlimit != 0);
547 /* No need to do special tweaking for inode limit */
548 if (pool->qpi_key >> 16 != LQUOTA_RES_DT)
549 return lqe->lqe_softlimit;
551 if (lqe->lqe_granted <= lqe->lqe_softlimit +
552 pool->qpi_soft_least_qunit) {
553 return lqe->lqe_softlimit;
554 } else if (lqe->lqe_hardlimit != 0) {
556 return lqe->lqe_hardlimit;
564 * Try to grant more quota space back to slave.
566 * \param lqe - is the quota entry for which we would like to allocate more
568 * \param granted - is how much was already granted as part of the request
570 * \param spare - is how much unused quota space the slave already owns
572 * \retval return how additional space can be granted to the slave
574 __u64 qmt_alloc_expand(struct lquota_entry *lqe, __u64 granted, __u64 spare)
576 struct qmt_pool_info *pool = lqe2qpi(lqe);
577 __u64 remaining, qunit;
580 LASSERT(lqe->lqe_enforced && lqe->lqe_qunit != 0);
582 slv_cnt = lqe2qpi(lqe)->qpi_slv_nr[lqe->lqe_site->lqs_qtype];
583 qunit = lqe->lqe_qunit;
585 /* See comment in qmt_adjust_qunit(). LU-4139. */
586 if (lqe->lqe_softlimit != 0) {
588 remaining = qmt_calc_softlimit(lqe, &oversoft);
590 remaining = lqe->lqe_granted +
591 pool->qpi_soft_least_qunit;
593 remaining = lqe->lqe_hardlimit;
596 if (lqe->lqe_granted >= remaining)
599 remaining -= lqe->lqe_granted;
605 granted &= (qunit - 1);
607 if (remaining > (slv_cnt * qunit) >> 1) {
608 /* enough room to grant more space w/o additional
609 * shrinking ... at least for now */
610 remaining -= (slv_cnt * qunit) >> 1;
611 } else if (qunit != pool->qpi_least_qunit) {
616 granted &= (qunit - 1);
618 RETURN(min_t(__u64, qunit - spare, remaining));
620 RETURN(min_t(__u64, qunit - granted, remaining));
621 } while (qunit >= pool->qpi_least_qunit);
627 * Adjust qunit size according to quota limits and total granted count.
628 * The caller must have locked the lqe.
630 * \param env - the environment passed by the caller
631 * \param lqe - is the qid entry to be adjusted
633 void qmt_adjust_qunit(const struct lu_env *env, struct lquota_entry *lqe)
635 struct qmt_pool_info *pool = lqe2qpi(lqe);
637 __u64 qunit, limit, qunit2 = 0;
640 LASSERT(lqe_is_locked(lqe));
642 if (!lqe->lqe_enforced || lqe->lqe_id.qid_uid == 0)
643 /* no quota limits */
646 /* record how many slaves have already registered */
647 slv_cnt = pool->qpi_slv_nr[lqe->lqe_site->lqs_qtype];
649 /* wait for at least one slave to join */
652 /* Qunit calculation is based on soft limit, if any, hard limit
653 * otherwise. This means that qunit is shrunk to the minimum when
654 * beyond the soft limit. This will impact performance, but that's the
655 * price of an accurate grace time management. */
656 if (lqe->lqe_softlimit != 0) {
658 /* As a compromise of write performance and the grace time
659 * accuracy, the block qunit size will be shrunk to
660 * qpi_soft_least_qunit when over softlimit. LU-4139. */
661 limit = qmt_calc_softlimit(lqe, &oversoft);
663 qunit2 = pool->qpi_soft_least_qunit;
665 GOTO(done, qunit = qunit2);
666 } else if (lqe->lqe_hardlimit != 0) {
667 limit = lqe->lqe_hardlimit;
669 LQUOTA_ERROR(lqe, "enforced bit set, but neither hard nor soft "
674 qunit = lqe->lqe_qunit == 0 ? pool->qpi_least_qunit : lqe->lqe_qunit;
676 /* The qunit value is computed as follows: limit / (2 * slv_cnt).
677 * Then 75% of the quota space can be granted with current qunit value.
678 * The remaining 25% are then used with reduced qunit size (by a factor
679 * of 4) which is then divided in a similar manner.
681 * |---------------------limit---------------------|
682 * |-------limit / 2-------|-limit / 4-|-limit / 4-|
683 * |qunit|qunit|qunit|qunit| | |
684 * |----slv_cnt * qunit----| | |
685 * |-grow limit-| | | |
686 * |--------------shrink limit---------| |
687 * |---space granted in qunit chunks---|-remaining-|
693 * qunit >>= 2; |qunit*slv_cnt|qunit*slv_cnt|
694 * |---space in qunit---|remain|
696 if (qunit == pool->qpi_least_qunit ||
697 limit >= lqe->lqe_granted + ((slv_cnt * qunit) >> 1)) {
698 /* current qunit value still fits, let's see if we can afford to
699 * increase qunit now ...
700 * To increase qunit again, we have to be under 25% */
701 while (qunit && limit >= lqe->lqe_granted + 6 * qunit * slv_cnt)
706 do_div(qunit, 2 * slv_cnt);
710 /* shrink qunit until we find a suitable value */
711 while (qunit > pool->qpi_least_qunit &&
712 limit < lqe->lqe_granted + ((slv_cnt * qunit) >> 1))
716 if (qunit2 && qunit > qunit2)
719 if (lqe->lqe_qunit == qunit)
720 /* keep current qunit */
723 LQUOTA_DEBUG(lqe, "%s qunit to %llu",
724 lqe->lqe_qunit < qunit ? "increasing" : "decreasing",
727 /* store new qunit value */
728 swap(lqe->lqe_qunit, qunit);
730 /* reset revoke time */
731 lqe->lqe_revoke_time = 0;
733 if (lqe->lqe_qunit < qunit)
734 /* let's notify slave of qunit shrinking */
735 qmt_id_lock_notify(pool->qpi_qmt, lqe);
736 else if (lqe->lqe_qunit == pool->qpi_least_qunit)
737 /* initial qunit value is the smallest one */
738 lqe->lqe_revoke_time = ktime_get_seconds();
743 * Adjust qunit & edquot flag in case it wasn't initialized already (e.g.
744 * limit set while no slaves were connected yet)
746 void qmt_revalidate(const struct lu_env *env, struct lquota_entry *lqe)
748 if (lqe->lqe_qunit == 0) {
749 /* lqe was read from disk, but neither qunit, nor edquot flag
750 * were initialized */
751 qmt_adjust_qunit(env, lqe);
752 if (lqe->lqe_qunit != 0)
753 qmt_adjust_edquot(lqe, ktime_get_real_seconds());