4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
31 #define DEBUG_SUBSYSTEM S_LQUOTA
33 #include <obd_class.h>
34 #include "qmt_internal.h"
37 * Retrieve quota settings for a given identifier.
39 * \param env - is the environment passed by the caller
40 * \param qmt - is the quota master target
41 * \param pool_id - is the 16-bit pool identifier
42 * \param restype - is the pool type, either block (i.e. LQUOTA_RES_DT) or inode
43 * (i.e. LQUOTA_RES_MD)
44 * \param qtype - is the quota type
45 * \param id - is the quota indentifier for which we want to acces quota
47 * \param hard - is the output variable where to copy the hard limit
48 * \param soft - is the output variable where to copy the soft limit
49 * \param time - is the output variable where to copy the grace time
51 static int qmt_get(const struct lu_env *env, struct qmt_device *qmt,
52 __u16 pool_id, __u8 restype, __u8 qtype, union lquota_id *id,
53 __u64 *hard, __u64 *soft, __u64 *time)
55 struct lquota_entry *lqe;
58 /* look-up lqe structure containing quota settings */
59 lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, restype, qtype, id);
63 /* copy quota settings */
65 LQUOTA_DEBUG(lqe, "fetch settings");
67 *hard = lqe->lqe_hardlimit;
69 *soft = lqe->lqe_softlimit;
71 *time = lqe->lqe_gracetime;
79 * Update quota settings for a given identifier.
81 * \param env - is the environment passed by the caller
82 * \param qmt - is the quota master target
83 * \param pool_id - is the 16-bit pool identifier
84 * \param restype - is the pool type, either block (i.e. LQUOTA_RES_DT) or inode
85 * (i.e. LQUOTA_RES_MD)
86 * \param qtype - is the quota type
87 * \param id - is the quota indentifier for which we want to modify quota
89 * \param hard - is the new hard limit
90 * \param soft - is the new soft limit
91 * \param time - is the new grace time
92 * \param valid - is the list of settings to change
94 static int qmt_set(const struct lu_env *env, struct qmt_device *qmt,
95 __u16 pool_id, __u8 restype, __u8 qtype,
96 union lquota_id *id, __u64 hard, __u64 soft, __u64 time,
99 struct qmt_thread_info *qti = qmt_info(env);
100 struct lquota_entry *lqe;
101 struct thandle *th = NULL;
103 bool dirtied = false;
107 /* look-up quota entry associated with this ID */
108 lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, restype, qtype, id);
110 RETURN(PTR_ERR(lqe));
112 /* allocate & start transaction with enough credits to update quota
113 * settings in the global index file */
114 th = qmt_trans_start(env, lqe, &qti->qti_restore);
116 GOTO(out_nolock, rc = PTR_ERR(th));
118 now = cfs_time_current_sec();
121 LQUOTA_DEBUG(lqe, "changing quota settings valid:%x hard:"LPU64" soft:"
122 LPU64" time:"LPU64, valid, hard, soft, time);
124 if ((valid & QIF_TIMES) != 0 && lqe->lqe_gracetime != time) {
125 /* change time settings */
126 lqe->lqe_gracetime = time;
130 if ((valid & QIF_LIMITS) != 0 &&
131 (lqe->lqe_hardlimit != hard || lqe->lqe_softlimit != soft)) {
132 rc = qmt_validate_limits(lqe, hard, soft);
136 /* recompute qunit in case it was never initialized */
137 qmt_revalidate(env, lqe);
139 /* change quota limits */
140 lqe->lqe_hardlimit = hard;
141 lqe->lqe_softlimit = soft;
143 /* clear grace time */
144 if (lqe->lqe_softlimit == 0 ||
145 lqe->lqe_granted <= lqe->lqe_softlimit)
146 /* no soft limit or below soft limit, let's clear grace
148 lqe->lqe_gracetime = 0;
149 else if ((valid & QIF_TIMES) == 0)
150 /* set grace only if user hasn't provided his own */
151 lqe->lqe_gracetime = now + qmt_lqe_grace(lqe);
153 /* change enforced status based on new parameters */
154 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0)
155 lqe->lqe_enforced = false;
157 lqe->lqe_enforced = true;
163 /* write new quota settings to disk */
164 rc = qmt_glb_write(env, th, lqe, LQUOTA_BUMP_VER, &ver);
166 /* restore initial quota settings */
167 qmt_restore(lqe, &qti->qti_restore);
171 /* compute new qunit value now that we have modified the quota
173 qmt_adjust_qunit(env, lqe);
175 /* clear/set edquot flag as needed */
176 qmt_adjust_edquot(lqe, now);
180 lqe_write_unlock(lqe);
184 if (th != NULL && !IS_ERR(th))
185 dt_trans_stop(env, qmt->qmt_child, th);
187 if (rc == 0 && dirtied)
188 qmt_glb_lock_notify(env, lqe, ver);
194 * Handle quotactl request.
196 * \param env - is the environment passed by the caller
197 * \param ld - is the lu device associated with the qmt
198 * \param oqctl - is the quotactl request
200 static int qmt_quotactl(const struct lu_env *env, struct lu_device *ld,
201 struct obd_quotactl *oqctl)
203 struct qmt_thread_info *qti = qmt_info(env);
204 union lquota_id *id = &qti->qti_id;
205 struct qmt_device *qmt = lu2qmt_dev(ld);
206 struct obd_dqblk *dqb = &oqctl->qc_dqblk;
210 LASSERT(qmt != NULL);
212 if (oqctl->qc_type >= MAXQUOTAS)
213 /* invalid quota type */
216 switch (oqctl->qc_cmd) {
218 case Q_GETINFO: /* read grace times */
219 /* Global grace time is stored in quota settings of ID 0. */
222 /* read inode grace time */
223 rc = qmt_get(env, qmt, 0, LQUOTA_RES_MD, oqctl->qc_type, id,
224 NULL, NULL, &oqctl->qc_dqinfo.dqi_igrace);
228 /* read block grace time */
229 rc = qmt_get(env, qmt, 0, LQUOTA_RES_DT, oqctl->qc_type, id,
230 NULL, NULL, &oqctl->qc_dqinfo.dqi_bgrace);
233 case Q_SETINFO: /* modify grace times */
234 /* setinfo should be using dqi->dqi_valid, but lfs incorrectly
235 * sets the valid flags in dqb->dqb_valid instead, try to live
238 /* Global grace time is stored in quota settings of ID 0. */
241 if ((dqb->dqb_valid & QIF_ITIME) != 0) {
242 /* set inode grace time */
243 rc = qmt_set(env, qmt, 0, LQUOTA_RES_MD, oqctl->qc_type,
244 id, 0, 0, oqctl->qc_dqinfo.dqi_igrace,
250 if ((dqb->dqb_valid & QIF_BTIME) != 0)
251 /* set block grace time */
252 rc = qmt_set(env, qmt, 0, LQUOTA_RES_DT, oqctl->qc_type,
253 id, 0, 0, oqctl->qc_dqinfo.dqi_bgrace,
257 case Q_GETQUOTA: /* consult quota limit */
258 /* There is no quota limit for root user & group */
259 if (oqctl->qc_id == 0) {
260 memset(dqb, 0, sizeof(*dqb));
261 dqb->dqb_valid = QIF_LIMITS | QIF_TIMES;
264 /* extract quota ID from quotactl request */
265 id->qid_uid = oqctl->qc_id;
267 /* look-up inode quota settings */
268 rc = qmt_get(env, qmt, 0, LQUOTA_RES_MD, oqctl->qc_type, id,
269 &dqb->dqb_ihardlimit, &dqb->dqb_isoftlimit,
274 dqb->dqb_valid |= QIF_ILIMITS | QIF_ITIME;
275 /* master isn't aware of actual inode usage */
276 dqb->dqb_curinodes = 0;
278 /* look-up block quota settings */
279 rc = qmt_get(env, qmt, 0, LQUOTA_RES_DT, oqctl->qc_type, id,
280 &dqb->dqb_bhardlimit, &dqb->dqb_bsoftlimit,
285 dqb->dqb_valid |= QIF_BLIMITS | QIF_BTIME;
286 /* master doesn't know the actual block usage */
287 dqb->dqb_curspace = 0;
290 case Q_SETQUOTA: /* change quota limits */
291 if (oqctl->qc_id == 0)
292 /* can't enforce a quota limit for root user & group */
294 /* extract quota ID from quotactl request */
295 id->qid_uid = oqctl->qc_id;
297 if ((dqb->dqb_valid & QIF_IFLAGS) != 0) {
298 /* update inode quota settings */
299 rc = qmt_set(env, qmt, 0, LQUOTA_RES_MD, oqctl->qc_type,
300 id, dqb->dqb_ihardlimit,
301 dqb->dqb_isoftlimit, dqb->dqb_itime,
302 dqb->dqb_valid & QIF_IFLAGS);
307 if ((dqb->dqb_valid & QIF_BFLAGS) != 0)
308 /* update block quota settings */
309 rc = qmt_set(env, qmt, 0, LQUOTA_RES_DT, oqctl->qc_type,
310 id, dqb->dqb_bhardlimit,
311 dqb->dqb_bsoftlimit, dqb->dqb_btime,
312 dqb->dqb_valid & QIF_BFLAGS);
316 case Q_QUOTAOFF: /* quota is always turned on on the master */
319 case LUSTRE_Q_INVALIDATE: /* not supported any more */
323 CERROR("%s: unsupported quotactl command: %d\n",
324 qmt->qmt_svname, oqctl->qc_cmd);
332 * Helper function to handle quota request from slave.
334 * \param env - is the environment passed by the caller
335 * \param lqe - is the lquota_entry subject to the quota request
336 * \param qmt - is the master device
337 * \param uuid - is the uuid associated with the slave
338 * \param qb_flags - are the quota request flags as packed in the quota_body
339 * \param qb_count - is the amount of quota space the slave wants to
341 * \param qb_usage - is the current space usage on the slave
342 * \param repbody - is the quota_body of reply
344 * \retval 0 : success
345 * \retval -EDQUOT : out of quota
346 * -EINPROGRESS : inform client to retry write/create
347 * -ve : other appropriate errors
349 int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe,
350 struct qmt_device *qmt, struct obd_uuid *uuid, __u32 qb_flags,
351 __u64 qb_count, __u64 qb_usage, struct quota_body *repbody)
353 struct qmt_thread_info *qti = qmt_info(env);
355 struct dt_object *slv_obj = NULL;
356 __u64 slv_granted, slv_granted_bck;
357 struct thandle *th = NULL;
361 LASSERT(uuid != NULL);
363 /* initialize reply */
364 memset(repbody, 0, sizeof(*repbody));
365 memcpy(&repbody->qb_id, &lqe->lqe_id, sizeof(repbody->qb_id));
367 if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_RECOVERABLE_ERR))
368 RETURN(-cfs_fail_val);
370 /* look-up index file associated with acquiring slave */
371 slv_obj = lquota_disk_slv_find(env, qmt->qmt_child, LQE_ROOT(lqe),
372 lu_object_fid(&LQE_GLB_OBJ(lqe)->do_lu),
375 GOTO(out, rc = PTR_ERR(slv_obj));
377 /* pack slave fid in reply just for sanity check */
378 memcpy(&repbody->qb_slv_fid, lu_object_fid(&slv_obj->do_lu),
379 sizeof(struct lu_fid));
381 /* allocate & start transaction with enough credits to update
382 * global & slave indexes */
383 th = qmt_trans_start_with_slv(env, lqe, slv_obj, &qti->qti_restore);
385 GOTO(out, rc = PTR_ERR(th));
388 LQUOTA_DEBUG(lqe, "dqacq starts uuid:%s flags:0x%x wanted:"LPU64
389 " usage:"LPU64, obd_uuid2str(uuid), qb_flags, qb_count,
392 /* Legal race, limits have been removed on master, but slave didn't
393 * receive the change yet. Just return EINPROGRESS until the slave gets
395 if (!lqe->lqe_enforced && !req_is_rel(qb_flags))
396 GOTO(out_locked, rc = -ESRCH);
398 /* recompute qunit in case it was never initialized */
399 qmt_revalidate(env, lqe);
401 /* slave just wants to acquire per-ID lock */
402 if (req_is_acq(qb_flags) && qb_count == 0)
403 GOTO(out_locked, rc = 0);
405 /* fetch how much quota space is already granted to this slave */
406 rc = qmt_slv_read(env, lqe, slv_obj, &slv_granted);
408 LQUOTA_ERROR(lqe, "Failed to get granted for slave %s, rc=%d",
409 obd_uuid2str(uuid), rc);
410 GOTO(out_locked, rc);
412 /* recall how much space this slave currently owns in order to restore
413 * it in case of failure */
414 slv_granted_bck = slv_granted;
416 /* record current time for soft limit & grace time management */
417 now = (__u64)cfs_time_current_sec();
419 if (req_is_rel(qb_flags)) {
420 /* Slave would like to release quota space */
421 if (slv_granted < qb_count ||
422 lqe->lqe_granted < qb_count) {
423 /* can't release more than granted */
424 LQUOTA_ERROR(lqe, "Release too much! uuid:%s release:"
425 LPU64" granted:"LPU64", total:"LPU64,
426 obd_uuid2str(uuid), qb_count,
427 slv_granted, lqe->lqe_granted);
428 GOTO(out_locked, rc = -EINVAL);
431 repbody->qb_count = qb_count;
432 /* put released space back to global pool */
433 QMT_REL(lqe, slv_granted, qb_count);
434 GOTO(out_write, rc = 0);
437 if (req_has_rep(qb_flags) && slv_granted < qb_usage) {
438 /* Slave is reporting space usage in quota request and it turns
439 * out to be using more quota space than owned, so we adjust
440 * granted space regardless of the current state of affairs */
441 repbody->qb_count = qb_usage - slv_granted;
442 QMT_GRANT(lqe, slv_granted, repbody->qb_count);
445 if (!req_is_acq(qb_flags) && !req_is_preacq(qb_flags))
446 GOTO(out_write, rc = 0);
448 qmt_adjust_edquot(lqe, now);
450 /* no hope to claim further space back */
451 GOTO(out_write, rc = -EDQUOT);
453 if (qmt_space_exhausted(lqe, now)) {
454 /* might have some free space once rebalancing is completed */
455 rc = req_is_acq(qb_flags) ? -EINPROGRESS : -EDQUOT;
459 if (req_is_preacq(qb_flags)) {
460 /* slave would like to pre-acquire quota space. To do so, it
461 * reports in qb_count how much spare quota space it owns and we
462 * can grant back quota space which is consistent with qunit
465 if (qb_count >= lqe->lqe_qunit)
466 /* slave already own the maximum it should */
467 GOTO(out_write, rc = 0);
469 count = qmt_alloc_expand(lqe, slv_granted, qb_count);
471 GOTO(out_write, rc = -EDQUOT);
473 repbody->qb_count += count;
474 QMT_GRANT(lqe, slv_granted, count);
475 GOTO(out_write, rc = 0);
478 /* processing acquire request with clients waiting */
479 if (lqe->lqe_hardlimit != 0 &&
480 lqe->lqe_granted + qb_count > lqe->lqe_hardlimit) {
481 /* cannot grant as much as asked, but can still afford to grant
482 * some quota space back */
483 count = lqe->lqe_hardlimit - lqe->lqe_granted;
484 repbody->qb_count += count;
485 QMT_GRANT(lqe, slv_granted, count);
486 GOTO(out_write, rc = 0);
489 /* Whouhou! we can satisfy the slave request! */
490 repbody->qb_count += qb_count;
491 QMT_GRANT(lqe, slv_granted, qb_count);
493 /* Try to expand the acquired count for DQACQ */
494 count = qmt_alloc_expand(lqe, slv_granted, 0);
496 /* can even grant more than asked, it is like xmas ... */
497 repbody->qb_count += count;
498 QMT_GRANT(lqe, slv_granted, count);
499 GOTO(out_write, rc = 0);
502 GOTO(out_write, rc = 0);
504 if (repbody->qb_count == 0)
505 GOTO(out_locked, rc);
507 /* start/stop grace timer if required */
508 if (lqe->lqe_softlimit != 0) {
509 if (lqe->lqe_granted > lqe->lqe_softlimit &&
510 lqe->lqe_gracetime == 0)
511 /* first time over soft limit, let's start grace
513 lqe->lqe_gracetime = now + qmt_lqe_grace(lqe);
514 else if (lqe->lqe_granted <= lqe->lqe_softlimit &&
515 lqe->lqe_gracetime != 0)
516 /* Clear grace timer */
517 lqe->lqe_gracetime = 0;
520 /* Update slave index first since it is easier to roll back */
521 ret = qmt_slv_write(env, th, lqe, slv_obj, LQUOTA_BUMP_VER,
522 &repbody->qb_slv_ver, slv_granted);
524 /* restore initial quota settings */
525 qmt_restore(lqe, &qti->qti_restore);
527 repbody->qb_count = 0;
528 GOTO(out_locked, rc = ret);
531 /* Update global index, no version bump needed */
532 ret = qmt_glb_write(env, th, lqe, 0, NULL);
535 /* restore initial quota settings */
536 qmt_restore(lqe, &qti->qti_restore);
538 repbody->qb_count = 0;
540 /* restore previous granted value */
541 ret = qmt_slv_write(env, th, lqe, slv_obj, 0, NULL,
544 LQUOTA_ERROR(lqe, "failed to restore initial slave "
545 "value rc:%d ret%d", rc, ret);
548 qmt_adjust_edquot(lqe, now);
549 GOTO(out_locked, rc);
552 /* Total granted has been changed, let's try to adjust the qunit
553 * size according to the total granted & limits. */
554 qmt_adjust_qunit(env, lqe);
556 /* clear/set edquot flag and notify slaves via glimpse if needed */
557 qmt_adjust_edquot(lqe, now);
559 LQUOTA_DEBUG(lqe, "dqacq ends count:"LPU64" ver:"LPU64" rc:%d",
560 repbody->qb_count, repbody->qb_slv_ver, rc);
561 lqe_write_unlock(lqe);
563 if (th != NULL && !IS_ERR(th))
564 dt_trans_stop(env, qmt->qmt_child, th);
566 if (slv_obj != NULL && !IS_ERR(slv_obj))
567 lu_object_put(env, &slv_obj->do_lu);
569 if ((req_is_acq(qb_flags) || req_is_preacq(qb_flags)) &&
570 OBD_FAIL_CHECK(OBD_FAIL_QUOTA_EDQUOT)) {
571 /* introduce inconsistency between granted value in slave index
572 * and slave index copy of slave */
573 repbody->qb_count = 0;
581 * Handle quota request from slave.
583 * \param env - is the environment passed by the caller
584 * \param ld - is the lu device associated with the qmt
585 * \param req - is the quota acquire request
587 static int qmt_dqacq(const struct lu_env *env, struct lu_device *ld,
588 struct ptlrpc_request *req)
590 struct qmt_device *qmt = lu2qmt_dev(ld);
591 struct quota_body *qbody, *repbody;
592 struct obd_uuid *uuid;
593 struct ldlm_lock *lock;
594 struct lquota_entry *lqe;
595 int pool_id, pool_type, qtype;
599 qbody = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_BODY);
601 RETURN(err_serious(-EPROTO));
603 repbody = req_capsule_server_get(&req->rq_pill, &RMF_QUOTA_BODY);
605 RETURN(err_serious(-EFAULT));
607 /* verify if global lock is stale */
608 if (!lustre_handle_is_used(&qbody->qb_glb_lockh))
611 lock = ldlm_handle2lock(&qbody->qb_glb_lockh);
616 uuid = &req->rq_export->exp_client_uuid;
618 if (req_is_rel(qbody->qb_flags) + req_is_acq(qbody->qb_flags) +
619 req_is_preacq(qbody->qb_flags) > 1) {
620 CERROR("%s: malformed quota request with conflicting flags set "
621 "(%x) from slave %s\n", qmt->qmt_svname,
622 qbody->qb_flags, obd_uuid2str(uuid));
626 if (req_is_acq(qbody->qb_flags) || req_is_preacq(qbody->qb_flags)) {
627 /* acquire and pre-acquire should use a valid ID lock */
629 if (!lustre_handle_is_used(&qbody->qb_lockh))
632 lock = ldlm_handle2lock(&qbody->qb_lockh);
634 /* no lock associated with this handle */
637 LDLM_DEBUG(lock, "%sacquire request",
638 req_is_preacq(qbody->qb_flags) ? "pre" : "");
640 if (!obd_uuid_equals(&lock->l_export->exp_client_uuid, uuid)) {
641 /* sorry, no way to cheat ... */
646 if ((lock->l_flags & LDLM_FL_AST_SENT) != 0) {
647 struct ptlrpc_service_part *svc;
648 unsigned int timeout;
650 svc = req->rq_rqbd->rqbd_svcpt;
651 timeout = at_est2timeout(at_get(&svc->scp_at_estimate));
652 timeout = max(timeout, ldlm_timeout);
654 /* lock is being cancelled, prolong timeout */
655 ldlm_refresh_waiting_lock(lock, timeout);
660 /* extract pool & quota information from global index FID packed in the
662 rc = lquota_extract_fid(&qbody->qb_fid, &pool_id, &pool_type, &qtype);
666 /* Find the quota entry associated with the quota id */
667 lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, pool_type, qtype,
670 RETURN(PTR_ERR(lqe));
672 /* process quota request */
673 rc = qmt_dqacq0(env, lqe, qmt, uuid, qbody->qb_flags, qbody->qb_count,
674 qbody->qb_usage, repbody);
676 if (lustre_handle_is_used(&qbody->qb_lockh))
677 /* return current qunit value only to slaves owning an per-ID
678 * quota lock. For enqueue, the qunit value will be returned in
680 repbody->qb_qunit = lqe->lqe_qunit;
685 /* Vector of quota request handlers. This vector is used by the MDT to forward
686 * requests to the quota master. */
687 struct qmt_handlers qmt_hdls = {
688 /* quota request handlers */
689 .qmth_quotactl = qmt_quotactl,
690 .qmth_dqacq = qmt_dqacq,
693 .qmth_intent_policy = qmt_intent_policy,
694 .qmth_lvbo_init = qmt_lvbo_init,
695 .qmth_lvbo_update = qmt_lvbo_update,
696 .qmth_lvbo_size = qmt_lvbo_size,
697 .qmth_lvbo_fill = qmt_lvbo_fill,
698 .qmth_lvbo_free = qmt_lvbo_free,
700 EXPORT_SYMBOL(qmt_hdls);