4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012 Intel, Inc.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
32 # define EXPORT_SYMTAB
35 #define DEBUG_SUBSYSTEM S_LQUOTA
37 #include "qmt_internal.h"
40 * Initialize qmt-specific fields of quota entry.
42 * \param lqe - is the quota entry to initialize
43 * \param arg - is the pointer to the qmt_pool_info structure
45 static void qmt_lqe_init(struct lquota_entry *lqe, void *arg)
47 LASSERT(lqe_is_master(lqe));
49 lqe->lqe_revoke_time = 0;
50 cfs_init_rwsem(&lqe->lqe_sem);
54 * Update a lquota entry. This is done by reading quota settings from the global
55 * index. The lquota entry must be write locked.
57 * \param env - the environment passed by the caller
58 * \param lqe - is the quota entry to refresh
59 * \param arg - is the pointer to the qmt_pool_info structure
61 static int qmt_lqe_read(const struct lu_env *env, struct lquota_entry *lqe,
64 struct qmt_thread_info *qti = qmt_info(env);
65 struct qmt_pool_info *pool = (struct qmt_pool_info *)arg;
69 LASSERT(lqe_is_master(lqe));
71 /* read record from disk */
72 rc = lquota_disk_read(env, pool->qpi_glb_obj[lqe->lqe_site->lqs_qtype],
73 &lqe->lqe_id, (struct dt_rec *)&qti->qti_glb_rec);
77 /* no such entry, assume quota isn't enforced for this user */
78 lqe->lqe_enforced = false;
81 /* copy quota settings from on-disk record */
82 lqe->lqe_granted = qti->qti_glb_rec.qbr_granted;
83 lqe->lqe_hardlimit = qti->qti_glb_rec.qbr_hardlimit;
84 lqe->lqe_softlimit = qti->qti_glb_rec.qbr_softlimit;
85 lqe->lqe_gracetime = qti->qti_glb_rec.qbr_time;
87 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0)
88 /* {hard,soft}limit=0 means no quota enforced */
89 lqe->lqe_enforced = false;
91 lqe->lqe_enforced = true;
95 LQUOTA_ERROR(lqe, "failed to read quota entry from disk, rc:%d",
100 LQUOTA_DEBUG(lqe, "read");
105 * Print lqe information for debugging.
107 * \param lqe - is the quota entry to debug
108 * \param arg - is the pointer to the qmt_pool_info structure
109 * \param msgdata - debug message
110 * \param fmt - format of debug message
112 static void qmt_lqe_debug(struct lquota_entry *lqe, void *arg,
113 struct libcfs_debug_msg_data *msgdata,
114 const char *fmt, va_list args)
116 struct qmt_pool_info *pool = (struct qmt_pool_info *)arg;
118 libcfs_debug_vmsg2(msgdata, fmt, args,
119 "qmt:%s pool:%d-%s id:"LPU64" enforced:%d hard:"LPU64
120 " soft:"LPU64" granted:"LPU64" time:"LPU64" qunit:"
121 LPU64" edquot:%d may_rel:"LPU64" revoke:"LPU64"\n",
122 pool->qpi_qmt->qmt_svname,
123 pool->qpi_key & 0x0000ffff,
124 RES_NAME(pool->qpi_key >> 16),
125 lqe->lqe_id.qid_uid, lqe->lqe_enforced,
126 lqe->lqe_hardlimit, lqe->lqe_softlimit,
127 lqe->lqe_granted, lqe->lqe_gracetime,
128 lqe->lqe_qunit, lqe->lqe_edquot, lqe->lqe_may_rel,
129 lqe->lqe_revoke_time);
133 * Vector of quota entry operations supported on the master
135 struct lquota_entry_operations qmt_lqe_ops = {
136 .lqe_init = qmt_lqe_init,
137 .lqe_read = qmt_lqe_read,
138 .lqe_debug = qmt_lqe_debug,
142 * Reserve enough credits to update records in both the global index and
143 * the slave index identified by \slv_obj
145 * \param env - is the environment passed by the caller
146 * \param lqe - is the quota entry associated with the identifier
147 * subject to the change
148 * \param slv_obj - is the dt_object associated with the index file
149 * \param restore - is a temporary storage for current quota settings which will
150 * be restored if something goes wrong at index update time.
152 struct thandle *qmt_trans_start_with_slv(const struct lu_env *env,
153 struct lquota_entry *lqe,
154 struct dt_object *slv_obj,
155 struct qmt_lqe_restore *restore)
157 struct qmt_device *qmt;
162 LASSERT(lqe != NULL);
163 LASSERT(lqe_is_master(lqe));
165 qmt = lqe2qpi(lqe)->qpi_qmt;
168 LQUOTA_DEBUG(lqe, "declare write for slv "DFID,
169 PFID(lu_object_fid(&slv_obj->do_lu)));
171 /* start transaction */
172 th = dt_trans_create(env, qmt->qmt_child);
177 /* quota settings on master are updated synchronously for the
181 /* reserve credits for global index update */
182 rc = lquota_disk_declare_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id);
186 if (slv_obj != NULL) {
187 /* reserve credits for slave index update */
188 rc = lquota_disk_declare_write(env, th, slv_obj, &lqe->lqe_id);
193 /* start transaction */
194 rc = dt_trans_start_local(env, qmt->qmt_child, th);
201 dt_trans_stop(env, qmt->qmt_child, th);
203 LQUOTA_ERROR(lqe, "failed to slv declare write for "DFID
204 ", rc:%d", PFID(lu_object_fid(&slv_obj->do_lu)),
207 restore->qlr_hardlimit = lqe->lqe_hardlimit;
208 restore->qlr_softlimit = lqe->lqe_softlimit;
209 restore->qlr_gracetime = lqe->lqe_gracetime;
210 restore->qlr_granted = lqe->lqe_granted;
211 restore->qlr_qunit = lqe->lqe_qunit;
217 * Reserve enough credits to update a record in the global index
219 * \param env - is the environment passed by the caller
220 * \param lqe - is the quota entry to be modified in the global index
221 * \param restore - is a temporary storage for current quota settings which will
222 * be restored if something goes wrong at index update time.
224 struct thandle *qmt_trans_start(const struct lu_env *env,
225 struct lquota_entry *lqe,
226 struct qmt_lqe_restore *restore)
228 LQUOTA_DEBUG(lqe, "declare write");
229 return qmt_trans_start_with_slv(env, lqe, NULL, restore);
233 * Update record associated with a quota entry in the global index.
234 * If LQUOTA_BUMP_VER is set, then the global index version must also be
236 * The entry must be at least read locked, dirty and up-to-date.
238 * \param env - the environment passed by the caller
239 * \param th - is the transaction handle to be used for the disk writes
240 * \param lqe - is the quota entry to udpate
241 * \param obj - is the dt_object associated with the index file
242 * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER.
243 * \param ver - is used to return the new version of the index.
245 * \retval - 0 on success and lqe dirty flag cleared,
246 * appropriate error on failure and uptodate flag cleared.
248 int qmt_glb_write(const struct lu_env *env, struct thandle *th,
249 struct lquota_entry *lqe, __u32 flags, __u64 *ver)
251 struct qmt_thread_info *qti = qmt_info(env);
252 struct lquota_glb_rec *rec;
256 LASSERT(lqe != NULL);
257 LASSERT(lqe_is_master(lqe));
258 LASSERT(lqe_is_locked(lqe));
259 LASSERT(lqe->lqe_uptodate);
260 LASSERT((flags & ~(LQUOTA_BUMP_VER | LQUOTA_SET_VER)) == 0);
262 LQUOTA_DEBUG(lqe, "write glb");
264 if (!lqe->lqe_enforced && lqe->lqe_granted == 0 &&
265 lqe->lqe_id.qid_uid != 0) {
266 /* quota isn't enforced any more for this entry and there is no
267 * more space granted to slaves, let's just remove the entry
271 rec = &qti->qti_glb_rec;
273 /* fill global index with updated quota settings */
274 rec->qbr_granted = lqe->lqe_granted;
275 rec->qbr_hardlimit = lqe->lqe_hardlimit;
276 rec->qbr_softlimit = lqe->lqe_softlimit;
277 rec->qbr_time = lqe->lqe_gracetime;
280 /* write new quota settings */
281 rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id,
282 (struct dt_rec *)rec, flags, ver);
284 /* we failed to write the new quota settings to disk, report
285 * error to caller who will restore the initial value */
286 LQUOTA_ERROR(lqe, "failed to update global index, rc:%d", rc);
292 * Read from disk how much quota space is allocated to a slave.
293 * This is done by reading records from the dedicated slave index file.
294 * Return in \granted how much quota space is currently allocated to the
296 * The entry must be at least read locked.
298 * \param env - the environment passed by the caller
299 * \param lqe - is the quota entry associated with the identifier to look-up
301 * \param slv_obj - is the dt_object associated with the slave index
302 * \param granted - is the output parameter where to return how much space
303 * is granted to the slave.
305 * \retval - 0 on success, appropriate error on failure
307 int qmt_slv_read(const struct lu_env *env, struct lquota_entry *lqe,
308 struct dt_object *slv_obj, __u64 *granted)
310 struct qmt_thread_info *qti = qmt_info(env);
311 struct lquota_slv_rec *slv_rec = &qti->qti_slv_rec;
315 LASSERT(lqe != NULL);
316 LASSERT(lqe_is_master(lqe));
317 LASSERT(lqe_is_locked(lqe));
319 LQUOTA_DEBUG(lqe, "read slv "DFID,
320 PFID(lu_object_fid(&slv_obj->do_lu)));
322 /* read slave record from disk */
323 rc = lquota_disk_read(env, slv_obj, &lqe->lqe_id,
324 (struct dt_rec *)slv_rec);
330 /* extract granted from on-disk record */
331 *granted = slv_rec->qsr_granted;
334 LQUOTA_ERROR(lqe, "failed to read slave record "DFID,
335 PFID(lu_object_fid(&slv_obj->do_lu)));
339 LQUOTA_DEBUG(lqe, "successful slv read "LPU64, *granted);
345 * Update record in slave index file.
346 * The entry must be at least read locked.
348 * \param env - the environment passed by the caller
349 * \param th - is the transaction handle to be used for the disk writes
350 * \param lqe - is the dirty quota entry which will be updated at the same time
352 * \param slv_obj - is the dt_object associated with the slave index
353 * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER.
354 * \param ver - is used to return the new version of the index.
355 * \param granted - is the new amount of quota space owned by the slave
357 * \retval - 0 on success, appropriate error on failure
359 int qmt_slv_write(const struct lu_env *env, struct thandle *th,
360 struct lquota_entry *lqe, struct dt_object *slv_obj,
361 __u32 flags, __u64 *ver, __u64 granted)
363 struct qmt_thread_info *qti = qmt_info(env);
364 struct lquota_slv_rec *rec;
368 LASSERT(lqe != NULL);
369 LASSERT(lqe_is_master(lqe));
370 LASSERT(lqe_is_locked(lqe));
372 LQUOTA_DEBUG(lqe, "write slv "DFID" granted:"LPU64,
373 PFID(lu_object_fid(&slv_obj->do_lu)), granted);
376 /* this slave does not own any quota space for this ID any more,
377 * so let's just remove the entry from the index */
380 rec = &qti->qti_slv_rec;
382 /* updated space granted to this slave */
383 rec->qsr_granted = granted;
386 /* write new granted space */
387 rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id,
388 (struct dt_rec *)rec, flags, ver);
390 LQUOTA_ERROR(lqe, "failed to update slave index "DFID" granted:"
391 LPU64, PFID(lu_object_fid(&slv_obj->do_lu)),
400 * Check whether new limits are valid for this pool
402 * \param lqe - is the quota entry subject to the setquota
403 * \param hard - is the new hard limit
404 * \param soft - is the new soft limit
406 int qmt_validate_limits(struct lquota_entry *lqe, __u64 hard, __u64 soft)
410 if (hard != 0 && soft > hard)
411 /* soft limit must be less than hard limit */
417 * Set/clear edquot flag after quota space allocation/release or settings
418 * change. Slaves will be notified of changes via glimpse on per-ID lock
420 * \param lqe - is the quota entry to check
421 * \param now - is the current time in second used for grace time managment
423 void qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now)
425 struct qmt_pool_info *pool = lqe2qpi(lqe);
428 if (!lqe->lqe_enforced)
431 if (!lqe->lqe_edquot) {
432 /* space exhausted flag not set, let's check whether it is time
435 if (!qmt_space_exhausted(lqe, now))
436 /* the qmt still has available space */
439 if (lqe->lqe_qunit != pool->qpi_least_qunit)
440 /* we haven't reached the minimal qunit yet, so there is
441 * still hope that the rebalancing process might free up
442 * some quota space */
445 if (lqe->lqe_revoke_time == 0)
446 /* least qunit value not sent to all slaves yet */
449 if (lqe->lqe_may_rel != 0 &&
450 cfs_time_before_64(cfs_time_shift_64(-QMT_REBA_TIMEOUT),
451 lqe->lqe_revoke_time))
452 /* Let's give more time to slave to release space */
455 /* set edquot flag */
456 lqe->lqe_edquot = true;
458 /* space exhausted flag set, let's check whether it is time to
461 if (qmt_space_exhausted(lqe, now))
462 /* the qmt still has not space */
465 if (lqe->lqe_hardlimit != 0 &&
466 lqe->lqe_granted + pool->qpi_least_qunit >
468 /* we clear the flag only once at least one least qunit
472 /* clear edquot flag */
473 lqe->lqe_edquot = false;
476 LQUOTA_DEBUG(lqe, "changing edquot flag");
478 /* let's notify slave by issuing glimpse on per-ID lock.
479 * the rebalance thread will take care of this */
480 qmt_id_lock_notify(pool->qpi_qmt, lqe);
485 * Try to grant more quota space back to slave.
487 * \param lqe - is the quota entry for which we would like to allocate more
489 * \param granted - is how much was already granted as part of the request
491 * \param spare - is how much unused quota space the slave already owns
493 * \retval return how additional space can be granted to the slave
495 __u64 qmt_alloc_expand(struct lquota_entry *lqe, __u64 granted, __u64 spare)
497 struct qmt_pool_info *pool = lqe2qpi(lqe);
498 __u64 remaining, qunit;
501 LASSERT(lqe->lqe_enforced && lqe->lqe_qunit != 0);
503 slv_cnt = lqe2qpi(lqe)->qpi_slv_nr[lqe->lqe_site->lqs_qtype];
504 qunit = lqe->lqe_qunit;
506 if (lqe->lqe_softlimit != 0)
507 remaining = lqe->lqe_softlimit;
509 remaining = lqe->lqe_hardlimit;
511 if (lqe->lqe_granted >= remaining)
514 remaining -= lqe->lqe_granted;
520 granted &= (qunit - 1);
522 if (remaining > (slv_cnt * qunit) >> 1) {
523 /* enough room to grant more space w/o additional
524 * shrinking ... at least for now */
525 remaining -= (slv_cnt * qunit) >> 1;
526 } else if (qunit != pool->qpi_least_qunit) {
531 granted &= (qunit - 1);
533 RETURN(min_t(__u64, qunit - spare, remaining));
535 RETURN(min_t(__u64, qunit - granted, remaining));
536 } while (qunit >= pool->qpi_least_qunit);
542 * Adjust qunit size according to quota limits and total granted count.
543 * The caller must have locked the lqe.
545 * \param env - the environment passed by the caller
546 * \param lqe - is the qid entry to be adjusted
548 void qmt_adjust_qunit(const struct lu_env *env, struct lquota_entry *lqe)
550 struct qmt_pool_info *pool = lqe2qpi(lqe);
555 LASSERT(lqe_is_locked(lqe));
557 if (!lqe->lqe_enforced)
558 /* no quota limits */
561 /* record how many slaves have already registered */
562 slv_cnt = pool->qpi_slv_nr[lqe->lqe_site->lqs_qtype];
564 /* wait for at least one slave to join */
567 /* Qunit calculation is based on soft limit, if any, hard limit
568 * otherwise. This means that qunit is shrunk to the minimum when
569 * beyond the soft limit. This will impact performance, but that's the
570 * price of an accurate grace time management. */
571 if (lqe->lqe_softlimit != 0) {
572 limit = lqe->lqe_softlimit;
573 } else if (lqe->lqe_hardlimit != 0) {
574 limit = lqe->lqe_hardlimit;
576 LQUOTA_ERROR(lqe, "enforced bit set, but neither hard nor soft "
581 qunit = lqe->lqe_qunit == 0 ? pool->qpi_least_qunit : lqe->lqe_qunit;
583 /* The qunit value is computed as follows: limit / (2 * slv_cnt).
584 * Then 75% of the quota space can be granted with current qunit value.
585 * The remaining 25% are then used with reduced qunit size (by a factor
586 * of 4) which is then divided in a similar manner.
588 * |---------------------limit---------------------|
589 * |-------limit / 2-------|-limit / 4-|-limit / 4-|
590 * |qunit|qunit|qunit|qunit| | |
591 * |----slv_cnt * qunit----| | |
592 * |-grow limit-| | | |
593 * |--------------shrink limit---------| |
594 * |---space granted in qunit chunks---|-remaining-|
600 * qunit >>= 2; |qunit*slv_cnt|qunit*slv_cnt|
601 * |---space in qunit---|remain|
603 if (qunit == pool->qpi_least_qunit ||
604 limit >= lqe->lqe_granted + ((slv_cnt * qunit) >> 1)) {
605 /* current qunit value still fits, let's see if we can afford to
606 * increase qunit now ...
607 * To increase qunit again, we have to be under 25% */
608 while (limit >= lqe->lqe_granted + 6 * qunit * slv_cnt)
611 /* shrink qunit until we find a suitable value */
612 while (qunit > pool->qpi_least_qunit &&
613 limit < lqe->lqe_granted + ((slv_cnt * qunit) >> 1))
617 if (lqe->lqe_qunit == qunit)
618 /* keep current qunit */
621 LQUOTA_DEBUG(lqe, "%s qunit to "LPU64,
622 lqe->lqe_qunit < qunit ? "increasing" : "decreasing",
625 /* store new qunit value */
626 swap(lqe->lqe_qunit, qunit);
628 /* reset revoke time */
629 lqe->lqe_revoke_time = 0;
631 if (lqe->lqe_qunit < qunit)
632 /* let's notify slave of qunit shrinking */
633 qmt_id_lock_notify(pool->qpi_qmt, lqe);
634 else if (lqe->lqe_qunit == pool->qpi_least_qunit)
635 /* initial qunit value is the smallest one */
636 lqe->lqe_revoke_time = cfs_time_current_64();
641 * Adjust qunit & edquot flag in case it wasn't initialized already (e.g.
642 * limit set while no slaves were connected yet)
644 void qmt_revalidate(const struct lu_env *env, struct lquota_entry *lqe)
646 if (lqe->lqe_qunit == 0) {
647 /* lqe was read from disk, but neither qunit, nor edquot flag
648 * were initialized */
649 qmt_adjust_qunit(env, lqe);
650 if (lqe->lqe_qunit != 0)
651 qmt_adjust_edquot(lqe, cfs_time_current_sec());