4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
32 # define EXPORT_SYMTAB
35 #define DEBUG_SUBSYSTEM S_LQUOTA
37 #include <obd_class.h>
38 #include "qmt_internal.h"
41 * Retrieve quota settings for a given identifier.
43 * \param env - is the environment passed by the caller
44 * \param qmt - is the quota master target
45 * \param pool_id - is the 16-bit pool identifier
46 * \param restype - is the pool type, either block (i.e. LQUOTA_RES_DT) or inode
47 * (i.e. LQUOTA_RES_MD)
48 * \param qtype - is the quota type
49 * \param id - is the quota indentifier for which we want to acces quota
51 * \param hard - is the output variable where to copy the hard limit
52 * \param soft - is the output variable where to copy the soft limit
53 * \param time - is the output variable where to copy the grace time
55 static int qmt_get(const struct lu_env *env, struct qmt_device *qmt,
56 __u16 pool_id, __u8 restype, __u8 qtype, union lquota_id *id,
57 __u64 *hard, __u64 *soft, __u64 *time)
59 struct lquota_entry *lqe;
62 /* look-up lqe structure containing quota settings */
63 lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, restype, qtype, id);
67 /* copy quota settings */
69 LQUOTA_DEBUG(lqe, "fetch settings");
71 *hard = lqe->lqe_hardlimit;
73 *soft = lqe->lqe_softlimit;
75 *time = lqe->lqe_gracetime;
83 * Update quota settings for a given identifier.
85 * \param env - is the environment passed by the caller
86 * \param qmt - is the quota master target
87 * \param pool_id - is the 16-bit pool identifier
88 * \param restype - is the pool type, either block (i.e. LQUOTA_RES_DT) or inode
89 * (i.e. LQUOTA_RES_MD)
90 * \param qtype - is the quota type
91 * \param id - is the quota indentifier for which we want to modify quota
93 * \param hard - is the new hard limit
94 * \param soft - is the new soft limit
95 * \param time - is the new grace time
96 * \param valid - is the list of settings to change
98 static int qmt_set(const struct lu_env *env, struct qmt_device *qmt,
99 __u16 pool_id, __u8 restype, __u8 qtype,
100 union lquota_id *id, __u64 hard, __u64 soft, __u64 time,
103 struct qmt_thread_info *qti = qmt_info(env);
104 struct lquota_entry *lqe;
105 struct thandle *th = NULL;
107 bool dirtied = false;
111 /* look-up quota entry associated with this ID */
112 lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, restype, qtype, id);
114 RETURN(PTR_ERR(lqe));
116 /* allocate & start transaction with enough credits to update quota
117 * settings in the global index file */
118 th = qmt_trans_start(env, lqe, &qti->qti_restore);
120 GOTO(out_nolock, rc = PTR_ERR(th));
122 now = cfs_time_current_sec();
125 LQUOTA_DEBUG(lqe, "changing quota settings valid:%x hard:"LPU64" soft:"
126 LPU64" time:"LPU64, valid, hard, soft, time);
128 if ((valid & QIF_TIMES) != 0 && lqe->lqe_gracetime != time) {
129 /* change time settings */
130 lqe->lqe_gracetime = time;
134 if ((valid & QIF_LIMITS) != 0 &&
135 (lqe->lqe_hardlimit != hard || lqe->lqe_softlimit != soft)) {
136 rc = qmt_validate_limits(lqe, hard, soft);
140 /* recompute qunit in case it was never initialized */
141 qmt_revalidate(env, lqe);
143 /* change quota limits */
144 lqe->lqe_hardlimit = hard;
145 lqe->lqe_softlimit = soft;
147 /* clear grace time */
148 if (lqe->lqe_softlimit == 0 ||
149 lqe->lqe_granted <= lqe->lqe_softlimit)
150 /* no soft limit or below soft limit, let's clear grace
152 lqe->lqe_gracetime = 0;
153 else if ((valid & QIF_TIMES) == 0)
154 /* set grace only if user hasn't provided his own */
155 lqe->lqe_gracetime = now + qmt_lqe_grace(lqe);
157 /* change enforced status based on new parameters */
158 if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0)
159 lqe->lqe_enforced = false;
161 lqe->lqe_enforced = true;
167 /* write new quota settings to disk */
168 rc = qmt_glb_write(env, th, lqe, LQUOTA_BUMP_VER, &ver);
170 /* restore initial quota settings */
171 qmt_restore(lqe, &qti->qti_restore);
175 /* compute new qunit value now that we have modified the quota
177 qmt_adjust_qunit(env, lqe);
179 /* clear/set edquot flag as needed */
180 qmt_adjust_edquot(lqe, now);
184 lqe_write_unlock(lqe);
188 if (th != NULL && !IS_ERR(th))
189 dt_trans_stop(env, qmt->qmt_child, th);
191 if (rc == 0 && dirtied)
192 qmt_glb_lock_notify(env, lqe, ver);
198 * Handle quotactl request.
200 * \param env - is the environment passed by the caller
201 * \param ld - is the lu device associated with the qmt
202 * \param oqctl - is the quotactl request
204 static int qmt_quotactl(const struct lu_env *env, struct lu_device *ld,
205 struct obd_quotactl *oqctl)
207 struct qmt_thread_info *qti = qmt_info(env);
208 union lquota_id *id = &qti->qti_id;
209 struct qmt_device *qmt = lu2qmt_dev(ld);
210 struct obd_dqblk *dqb = &oqctl->qc_dqblk;
214 LASSERT(qmt != NULL);
216 if (oqctl->qc_type >= MAXQUOTAS)
217 /* invalid quota type */
220 switch (oqctl->qc_cmd) {
222 case Q_GETINFO: /* read grace times */
223 /* Global grace time is stored in quota settings of ID 0. */
226 /* read inode grace time */
227 rc = qmt_get(env, qmt, 0, LQUOTA_RES_MD, oqctl->qc_type, id,
228 NULL, NULL, &oqctl->qc_dqinfo.dqi_igrace);
232 /* read block grace time */
233 rc = qmt_get(env, qmt, 0, LQUOTA_RES_DT, oqctl->qc_type, id,
234 NULL, NULL, &oqctl->qc_dqinfo.dqi_bgrace);
237 case Q_SETINFO: /* modify grace times */
238 /* setinfo should be using dqi->dqi_valid, but lfs incorrectly
239 * sets the valid flags in dqb->dqb_valid instead, try to live
242 /* Global grace time is stored in quota settings of ID 0. */
245 if ((dqb->dqb_valid & QIF_ITIME) != 0) {
246 /* set inode grace time */
247 rc = qmt_set(env, qmt, 0, LQUOTA_RES_MD, oqctl->qc_type,
248 id, 0, 0, oqctl->qc_dqinfo.dqi_igrace,
254 if ((dqb->dqb_valid & QIF_BTIME) != 0)
255 /* set block grace time */
256 rc = qmt_set(env, qmt, 0, LQUOTA_RES_DT, oqctl->qc_type,
257 id, 0, 0, oqctl->qc_dqinfo.dqi_bgrace,
261 case Q_GETQUOTA: /* consult quota limit */
262 /* There is no quota limit for root user & group */
263 if (oqctl->qc_id == 0) {
264 memset(dqb, 0, sizeof(*dqb));
265 dqb->dqb_valid = QIF_LIMITS | QIF_TIMES;
268 /* extract quota ID from quotactl request */
269 id->qid_uid = oqctl->qc_id;
271 /* look-up inode quota settings */
272 rc = qmt_get(env, qmt, 0, LQUOTA_RES_MD, oqctl->qc_type, id,
273 &dqb->dqb_ihardlimit, &dqb->dqb_isoftlimit,
278 dqb->dqb_valid |= QIF_ILIMITS | QIF_ITIME;
279 /* master isn't aware of actual inode usage */
280 dqb->dqb_curinodes = 0;
282 /* look-up block quota settings */
283 rc = qmt_get(env, qmt, 0, LQUOTA_RES_DT, oqctl->qc_type, id,
284 &dqb->dqb_bhardlimit, &dqb->dqb_bsoftlimit,
289 dqb->dqb_valid |= QIF_BLIMITS | QIF_BTIME;
290 /* master doesn't know the actual block usage */
291 dqb->dqb_curspace = 0;
294 case Q_SETQUOTA: /* change quota limits */
295 if (oqctl->qc_id == 0)
296 /* can't enforce a quota limit for root user & group */
298 /* extract quota ID from quotactl request */
299 id->qid_uid = oqctl->qc_id;
301 if ((dqb->dqb_valid & QIF_IFLAGS) != 0) {
302 /* update inode quota settings */
303 rc = qmt_set(env, qmt, 0, LQUOTA_RES_MD, oqctl->qc_type,
304 id, dqb->dqb_ihardlimit,
305 dqb->dqb_isoftlimit, dqb->dqb_itime,
306 dqb->dqb_valid & QIF_IFLAGS);
311 if ((dqb->dqb_valid & QIF_BFLAGS) != 0)
312 /* update block quota settings */
313 rc = qmt_set(env, qmt, 0, LQUOTA_RES_DT, oqctl->qc_type,
314 id, dqb->dqb_bhardlimit,
315 dqb->dqb_bsoftlimit, dqb->dqb_btime,
316 dqb->dqb_valid & QIF_BFLAGS);
320 case Q_QUOTAOFF: /* quota is always turned on on the master */
323 case LUSTRE_Q_INVALIDATE: /* not supported any more */
327 CERROR("%s: unsupported quotactl command: %d\n",
328 qmt->qmt_svname, oqctl->qc_cmd);
336 * Helper function to handle quota request from slave.
338 * \param env - is the environment passed by the caller
339 * \param lqe - is the lquota_entry subject to the quota request
340 * \param qmt - is the master device
341 * \param uuid - is the uuid associated with the slave
342 * \param qb_flags - are the quota request flags as packed in the quota_body
343 * \param qb_count - is the amount of quota space the slave wants to
345 * \param qb_usage - is the current space usage on the slave
346 * \param repbody - is the quota_body of reply
348 * \retval 0 : success
349 * \retval -EDQUOT : out of quota
350 * -EINPROGRESS : inform client to retry write/create
351 * -ve : other appropriate errors
353 int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe,
354 struct qmt_device *qmt, struct obd_uuid *uuid, __u32 qb_flags,
355 __u64 qb_count, __u64 qb_usage, struct quota_body *repbody)
357 struct qmt_thread_info *qti = qmt_info(env);
359 struct dt_object *slv_obj = NULL;
360 __u64 slv_granted, slv_granted_bck;
361 struct thandle *th = NULL;
365 LASSERT(uuid != NULL);
367 /* initialize reply */
368 memset(repbody, 0, sizeof(*repbody));
369 memcpy(&repbody->qb_id, &lqe->lqe_id, sizeof(repbody->qb_id));
371 if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_RECOVERABLE_ERR))
372 RETURN(-cfs_fail_val);
374 /* look-up index file associated with acquiring slave */
375 slv_obj = lquota_disk_slv_find(env, qmt->qmt_child, LQE_ROOT(lqe),
376 lu_object_fid(&LQE_GLB_OBJ(lqe)->do_lu),
379 GOTO(out, rc = PTR_ERR(slv_obj));
381 /* pack slave fid in reply just for sanity check */
382 memcpy(&repbody->qb_slv_fid, lu_object_fid(&slv_obj->do_lu),
383 sizeof(struct lu_fid));
385 /* allocate & start transaction with enough credits to update
386 * global & slave indexes */
387 th = qmt_trans_start_with_slv(env, lqe, slv_obj, &qti->qti_restore);
389 GOTO(out, rc = PTR_ERR(th));
392 LQUOTA_DEBUG(lqe, "dqacq starts uuid:%s flags:0x%x wanted:"LPU64
393 " usage:"LPU64, obd_uuid2str(uuid), qb_flags, qb_count,
396 /* Legal race, limits have been removed on master, but slave didn't
397 * receive the change yet. Just return EINPROGRESS until the slave gets
399 if (!lqe->lqe_enforced && !req_is_rel(qb_flags))
400 GOTO(out_locked, rc = -ESRCH);
402 /* recompute qunit in case it was never initialized */
403 qmt_revalidate(env, lqe);
405 /* slave just wants to acquire per-ID lock */
406 if (req_is_acq(qb_flags) && qb_count == 0)
407 GOTO(out_locked, rc = 0);
409 /* fetch how much quota space is already granted to this slave */
410 rc = qmt_slv_read(env, lqe, slv_obj, &slv_granted);
412 LQUOTA_ERROR(lqe, "Failed to get granted for slave %s, rc=%d",
413 obd_uuid2str(uuid), rc);
414 GOTO(out_locked, rc);
416 /* recall how much space this slave currently owns in order to restore
417 * it in case of failure */
418 slv_granted_bck = slv_granted;
420 /* record current time for soft limit & grace time management */
421 now = (__u64)cfs_time_current_sec();
423 if (req_is_rel(qb_flags)) {
424 /* Slave would like to release quota space */
425 if (slv_granted < qb_count ||
426 lqe->lqe_granted < qb_count) {
427 /* can't release more than granted */
428 LQUOTA_ERROR(lqe, "Release too much! uuid:%s release:"
429 LPU64" granted:"LPU64", total:"LPU64,
430 obd_uuid2str(uuid), qb_count,
431 slv_granted, lqe->lqe_granted);
432 GOTO(out_locked, rc = -EINVAL);
435 repbody->qb_count = qb_count;
436 /* put released space back to global pool */
437 QMT_REL(lqe, slv_granted, qb_count);
438 GOTO(out_write, rc = 0);
441 if (req_has_rep(qb_flags) && slv_granted < qb_usage) {
442 /* Slave is reporting space usage in quota request and it turns
443 * out to be using more quota space than owned, so we adjust
444 * granted space regardless of the current state of affairs */
445 repbody->qb_count = qb_usage - slv_granted;
446 QMT_GRANT(lqe, slv_granted, repbody->qb_count);
449 if (!req_is_acq(qb_flags) && !req_is_preacq(qb_flags))
450 GOTO(out_write, rc = 0);
452 qmt_adjust_edquot(lqe, now);
454 /* no hope to claim further space back */
455 GOTO(out_write, rc = -EDQUOT);
457 if (qmt_space_exhausted(lqe, now)) {
458 /* might have some free space once rebalancing is completed */
459 rc = req_is_acq(qb_flags) ? -EINPROGRESS : -EDQUOT;
463 if (req_is_preacq(qb_flags)) {
464 /* slave would like to pre-acquire quota space. To do so, it
465 * reports in qb_count how much spare quota space it owns and we
466 * can grant back quota space which is consistent with qunit
469 if (qb_count >= lqe->lqe_qunit)
470 /* slave already own the maximum it should */
471 GOTO(out_write, rc = 0);
473 count = qmt_alloc_expand(lqe, slv_granted, qb_count);
475 GOTO(out_write, rc = -EDQUOT);
477 repbody->qb_count += count;
478 QMT_GRANT(lqe, slv_granted, count);
479 GOTO(out_write, rc = 0);
482 /* processing acquire request with clients waiting */
483 if (lqe->lqe_hardlimit != 0 &&
484 lqe->lqe_granted + qb_count > lqe->lqe_hardlimit) {
485 /* cannot grant as much as asked, but can still afford to grant
486 * some quota space back */
487 count = lqe->lqe_hardlimit - lqe->lqe_granted;
488 repbody->qb_count += count;
489 QMT_GRANT(lqe, slv_granted, count);
490 GOTO(out_write, rc = 0);
493 /* Whouhou! we can satisfy the slave request! */
494 repbody->qb_count += qb_count;
495 QMT_GRANT(lqe, slv_granted, qb_count);
497 /* Try to expand the acquired count for DQACQ */
498 count = qmt_alloc_expand(lqe, slv_granted, 0);
500 /* can even grant more than asked, it is like xmas ... */
501 repbody->qb_count += count;
502 QMT_GRANT(lqe, slv_granted, count);
503 GOTO(out_write, rc = 0);
506 GOTO(out_write, rc = 0);
508 if (repbody->qb_count == 0)
509 GOTO(out_locked, rc);
511 /* start/stop grace timer if required */
512 if (lqe->lqe_softlimit != 0) {
513 if (lqe->lqe_granted > lqe->lqe_softlimit &&
514 lqe->lqe_gracetime == 0)
515 /* first time over soft limit, let's start grace
517 lqe->lqe_gracetime = now + qmt_lqe_grace(lqe);
518 else if (lqe->lqe_granted <= lqe->lqe_softlimit &&
519 lqe->lqe_gracetime != 0)
520 /* Clear grace timer */
521 lqe->lqe_gracetime = 0;
524 /* Update slave index first since it is easier to roll back */
525 ret = qmt_slv_write(env, th, lqe, slv_obj, LQUOTA_BUMP_VER,
526 &repbody->qb_slv_ver, slv_granted);
528 /* restore initial quota settings */
529 qmt_restore(lqe, &qti->qti_restore);
531 repbody->qb_count = 0;
532 GOTO(out_locked, rc = ret);
535 /* Update global index, no version bump needed */
536 ret = qmt_glb_write(env, th, lqe, 0, NULL);
539 /* restore initial quota settings */
540 qmt_restore(lqe, &qti->qti_restore);
542 repbody->qb_count = 0;
544 /* restore previous granted value */
545 ret = qmt_slv_write(env, th, lqe, slv_obj, 0, NULL,
548 LQUOTA_ERROR(lqe, "failed to restore initial slave "
549 "value rc:%d ret%d", rc, ret);
552 qmt_adjust_edquot(lqe, now);
553 GOTO(out_locked, rc);
556 /* Total granted has been changed, let's try to adjust the qunit
557 * size according to the total granted & limits. */
558 qmt_adjust_qunit(env, lqe);
560 /* clear/set edquot flag and notify slaves via glimpse if needed */
561 qmt_adjust_edquot(lqe, now);
563 LQUOTA_DEBUG(lqe, "dqacq ends count:"LPU64" ver:"LPU64" rc:%d",
564 repbody->qb_count, repbody->qb_slv_ver, rc);
565 lqe_write_unlock(lqe);
567 if (th != NULL && !IS_ERR(th))
568 dt_trans_stop(env, qmt->qmt_child, th);
570 if (slv_obj != NULL && !IS_ERR(slv_obj))
571 lu_object_put(env, &slv_obj->do_lu);
573 if ((req_is_acq(qb_flags) || req_is_preacq(qb_flags)) &&
574 OBD_FAIL_CHECK(OBD_FAIL_QUOTA_EDQUOT)) {
575 /* introduce inconsistency between granted value in slave index
576 * and slave index copy of slave */
577 repbody->qb_count = 0;
585 * Handle quota request from slave.
587 * \param env - is the environment passed by the caller
588 * \param ld - is the lu device associated with the qmt
589 * \param req - is the quota acquire request
591 static int qmt_dqacq(const struct lu_env *env, struct lu_device *ld,
592 struct ptlrpc_request *req)
594 struct qmt_device *qmt = lu2qmt_dev(ld);
595 struct quota_body *qbody, *repbody;
596 struct obd_uuid *uuid;
597 struct ldlm_lock *lock;
598 struct lquota_entry *lqe;
599 int pool_id, pool_type, qtype;
603 qbody = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_BODY);
605 RETURN(err_serious(-EPROTO));
607 repbody = req_capsule_server_get(&req->rq_pill, &RMF_QUOTA_BODY);
609 RETURN(err_serious(-EFAULT));
611 /* verify if global lock is stale */
612 if (!lustre_handle_is_used(&qbody->qb_glb_lockh))
615 lock = ldlm_handle2lock(&qbody->qb_glb_lockh);
620 uuid = &req->rq_export->exp_client_uuid;
622 if (req_is_rel(qbody->qb_flags) + req_is_acq(qbody->qb_flags) +
623 req_is_preacq(qbody->qb_flags) > 1) {
624 CERROR("%s: malformed quota request with conflicting flags set "
625 "(%x) from slave %s\n", qmt->qmt_svname,
626 qbody->qb_flags, obd_uuid2str(uuid));
630 if (req_is_acq(qbody->qb_flags) || req_is_preacq(qbody->qb_flags)) {
631 /* acquire and pre-acquire should use a valid ID lock */
633 if (!lustre_handle_is_used(&qbody->qb_lockh))
636 lock = ldlm_handle2lock(&qbody->qb_lockh);
638 /* no lock associated with this handle */
641 LDLM_DEBUG(lock, "%sacquire request",
642 req_is_preacq(qbody->qb_flags) ? "pre" : "");
644 if (!obd_uuid_equals(&lock->l_export->exp_client_uuid, uuid)) {
645 /* sorry, no way to cheat ... */
650 if ((lock->l_flags & LDLM_FL_AST_SENT) != 0) {
651 struct ptlrpc_service_part *svc;
652 unsigned int timeout;
654 svc = req->rq_rqbd->rqbd_svcpt;
655 timeout = at_est2timeout(at_get(&svc->scp_at_estimate));
656 timeout = max(timeout, ldlm_timeout);
658 /* lock is being cancelled, prolong timeout */
659 ldlm_refresh_waiting_lock(lock, timeout);
664 /* extract pool & quota information from global index FID packed in the
666 rc = lquota_extract_fid(&qbody->qb_fid, &pool_id, &pool_type, &qtype);
670 /* Find the quota entry associated with the quota id */
671 lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, pool_type, qtype,
674 RETURN(PTR_ERR(lqe));
676 /* process quota request */
677 rc = qmt_dqacq0(env, lqe, qmt, uuid, qbody->qb_flags, qbody->qb_count,
678 qbody->qb_usage, repbody);
680 if (lustre_handle_is_used(&qbody->qb_lockh))
681 /* return current qunit value only to slaves owning an per-ID
682 * quota lock. For enqueue, the qunit value will be returned in
684 repbody->qb_qunit = lqe->lqe_qunit;
689 /* Vector of quota request handlers. This vector is used by the MDT to forward
690 * requests to the quota master. */
691 struct qmt_handlers qmt_hdls = {
692 /* quota request handlers */
693 .qmth_quotactl = qmt_quotactl,
694 .qmth_dqacq = qmt_dqacq,
697 .qmth_intent_policy = qmt_intent_policy,
698 .qmth_lvbo_init = qmt_lvbo_init,
699 .qmth_lvbo_update = qmt_lvbo_update,
700 .qmth_lvbo_size = qmt_lvbo_size,
701 .qmth_lvbo_fill = qmt_lvbo_fill,
702 .qmth_lvbo_free = qmt_lvbo_free,
704 EXPORT_SYMBOL(qmt_hdls);