4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann@whamcloud.com>
28 * Author: Niu Yawei <niu@whamcloud.com>
31 #include <dt_object.h>
32 #include <lustre_quota.h>
33 #include "osd_internal.h"
36 * Helpers function to find out the quota type (USRQUOTA/GRPQUOTA) of a
39 static inline int fid2type(const struct lu_fid *fid)
41 LASSERT(fid_is_acct(fid));
42 switch (fid_oid(fid)) {
47 case ACCT_PROJECT_OID:
51 LASSERTF(0, "invalid fid for quota type: %u\n", fid_oid(fid));
56 * Space Accounting Management
60 * Look up an accounting object based on its fid.
62 * \param info - is the osd thread info passed by the caller
63 * \param osd - is the osd device
64 * \param fid - is the fid of the accounting object we want to look up
65 * \param id - is the osd_inode_id struct to fill with the inode number of
66 * the quota file if the lookup is successful
68 int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd,
69 const struct lu_fid *fid, struct osd_inode_id *id)
71 struct super_block *sb = osd_sb(osd);
74 LASSERT(fid_is_acct(fid));
76 if (!ldiskfs_has_feature_quota(sb))
80 * ldiskfs won't load quota inodes on RO mount,
81 * So disable it in osd-ldiskfs to keep same behavior
82 * like lower layer to avoid further confusions.
84 if (osd->od_dt_dev.dd_rdonly)
87 id->oii_gen = OSD_OII_NOGEN;
88 switch (fid2type(fid)) {
91 le32_to_cpu(LDISKFS_SB(sb)->s_es->s_usr_quota_inum);
95 le32_to_cpu(LDISKFS_SB(sb)->s_es->s_grp_quota_inum);
98 #ifdef HAVE_PROJECT_QUOTA
99 if (ldiskfs_has_feature_project(sb)) {
102 prj_quota = LDISKFS_SB(sb)->s_es->s_prj_quota_inum;
103 id->oii_ino = le32_to_cpu(prj_quota);
110 if (!ldiskfs_valid_inum(sb, id->oii_ino) &&
111 id->oii_ino != LDISKFS_USR_QUOTA_INO &&
112 id->oii_ino != LDISKFS_GRP_QUOTA_INO)
119 * Return space usage (#blocks & #inodes) consumed by a given uid or gid.
121 * \param env - is the environment passed by the caller
122 * \param dtobj - is the accounting object
123 * \param dtrec - is the record to fill with space usage information
124 * \param dtkey - is the id of the user or group for which we would
125 * like to access disk usage.
127 * \retval +ve - success : exact match
128 * \retval -ve - failure
130 static int osd_acct_index_lookup(const struct lu_env *env,
131 struct dt_object *dtobj,
132 struct dt_rec *dtrec,
133 const struct dt_key *dtkey)
135 struct osd_thread_info *info = osd_oti_get(env);
136 #if defined(HAVE_DQUOT_QC_DQBLK)
137 struct qc_dqblk *dqblk = &info->oti_qdq;
139 struct fs_disk_quota *dqblk = &info->oti_fdq;
141 struct super_block *sb = osd_sb(osd_obj2dev(osd_dt_obj(dtobj)));
142 struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
143 __u64 id = *((__u64 *)dtkey);
150 type = fid2type(lu_object_fid(&dtobj->do_lu));
151 memset(dqblk, 0, sizeof(*dqblk));
152 qid = make_kqid(&init_user_ns, type, id);
153 rc = sb->s_qcop->get_dqblk(sb, qid, dqblk);
156 #if defined(HAVE_DQUOT_QC_DQBLK)
157 rec->bspace = dqblk->d_space;
158 rec->ispace = dqblk->d_ino_count;
160 rec->bspace = dqblk->d_bcount;
161 rec->ispace = dqblk->d_icount;
166 #define QUOTA_IT_READ_ERROR(it, rc) \
167 CERROR("%s: Error while trying to read quota information, " \
168 "failed with %d\n", \
169 osd_dev(it->oiq_obj->oo_dt.do_lu.lo_dev)->od_svname, rc) \
172 * Initialize osd Iterator for given osd index object.
174 * \param dt - osd index object
175 * \param attr - not used
177 static struct dt_it *osd_it_acct_init(const struct lu_env *env,
178 struct dt_object *dt,
181 struct osd_it_quota *it;
182 struct lu_object *lo = &dt->do_lu;
183 struct osd_object *obj = osd_dt_obj(dt);
187 LASSERT(lu_object_exists(lo));
191 RETURN(ERR_PTR(-ENOMEM));
195 INIT_LIST_HEAD(&it->oiq_list);
197 /* LUSTRE_DQTREEOFF is the initial offset where the tree can be found */
198 it->oiq_blk[0] = LUSTRE_DQTREEOFF;
201 * NB: we don't need to store the tree depth since it is always
202 * equal to LUSTRE_DQTREEDEPTH - 1 (root has depth = 0) for a leaf
205 RETURN((struct dt_it *)it);
209 * Free given iterator.
211 * \param di - osd iterator
213 static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
215 struct osd_it_quota *it = (struct osd_it_quota *)di;
216 struct osd_quota_leaf *leaf, *tmp;
220 osd_object_put(env, it->oiq_obj);
222 list_for_each_entry_safe(leaf, tmp, &it->oiq_list, oql_link) {
223 list_del_init(&leaf->oql_link);
233 * Move Iterator to record specified by \a key, if the \a key isn't found,
234 * move to the first valid record.
236 * \param di - osd iterator
237 * \param key - uid or gid
239 * \retval +ve - di points to the first valid record
240 * \retval +1 - di points to exact matched key
241 * \retval -ve - failure
243 static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
244 const struct dt_key *key)
246 struct osd_it_quota *it = (struct osd_it_quota *)di;
247 const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
249 qid_t dqid = *(qid_t *)key;
254 type = fid2type(fid);
256 offset = find_tree_dqentry(env, it->oiq_obj, type, dqid,
257 LUSTRE_DQTREEOFF, 0, it);
258 if (offset > 0) { /* Found */
260 } else if (offset < 0) { /* Error */
261 QUOTA_IT_READ_ERROR(it, (int)offset);
265 /* The @key is not found, move to the first valid entry */
266 rc = walk_tree_dqentry(env, it->oiq_obj, type, it->oiq_blk[0], 0,
279 * \param di - osd iterator
281 static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
286 static int osd_it_add_processed(struct osd_it_quota *it, int depth)
288 struct osd_quota_leaf *leaf;
293 INIT_LIST_HEAD(&leaf->oql_link);
294 leaf->oql_blk = it->oiq_blk[depth];
295 list_add_tail(&leaf->oql_link, &it->oiq_list);
300 * Move on to the next valid entry.
302 * \param di - osd iterator
304 * \retval +ve - iterator reached the end
305 * \retval 0 - iterator has not reached the end yet
306 * \retval -ve - unexpected failure
308 static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
310 struct osd_it_quota *it = (struct osd_it_quota *)di;
311 const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
318 type = fid2type(fid);
321 * Let's first check if there are any remaining valid entry in the
322 * current leaf block. Start with the next entry after the current one.
324 depth = LUSTRE_DQTREEDEPTH;
325 index = it->oiq_index[depth];
326 if (++index < LUSTRE_DQSTRINBLK) {
327 /* Search for the next valid entry from current index */
328 rc = walk_block_dqentry(env, it->oiq_obj, type,
329 it->oiq_blk[depth], index, it);
331 QUOTA_IT_READ_ERROR(it, rc);
333 } else if (rc == 0) {
335 * Found on entry, @it is already updated to the
336 * new position in walk_block_dqentry().
340 rc = osd_it_add_processed(it, depth);
345 rc = osd_it_add_processed(it, depth);
352 * We have consumed all the entries of the current leaf block, move on
358 * We keep searching as long as walk_tree_dqentry() returns +1
359 * (= no valid entry found).
361 for (; depth >= 0 && rc > 0; depth--) {
362 index = it->oiq_index[depth];
365 rc = walk_tree_dqentry(env, it->oiq_obj, type,
366 it->oiq_blk[depth], depth, index, it);
370 QUOTA_IT_READ_ERROR(it, rc);
375 * Return pointer to the key under iterator.
377 * \param di - osd iterator
379 static struct dt_key *osd_it_acct_key(const struct lu_env *env,
380 const struct dt_it *di)
382 struct osd_it_quota *it = (struct osd_it_quota *)di;
385 RETURN((struct dt_key *)&it->oiq_id);
389 * Return size of key under iterator (in bytes)
391 * \param di - osd iterator
393 static int osd_it_acct_key_size(const struct lu_env *env,
394 const struct dt_it *di)
396 struct osd_it_quota *it = (struct osd_it_quota *)di;
399 RETURN((int)sizeof(it->oiq_id));
403 * Return pointer to the record under iterator.
405 * \param di - osd iterator
406 * \param attr - not used
408 static int osd_it_acct_rec(const struct lu_env *env,
409 const struct dt_it *di,
410 struct dt_rec *dtrec, __u32 attr)
412 struct osd_it_quota *it = (struct osd_it_quota *)di;
413 const struct dt_key *key = osd_it_acct_key(env, di);
418 rc = osd_acct_index_lookup(env, &it->oiq_obj->oo_dt, dtrec, key);
419 RETURN(rc > 0 ? 0 : rc);
423 * Returns cookie for current Iterator position.
425 * \param di - osd iterator
427 static __u64 osd_it_acct_store(const struct lu_env *env,
428 const struct dt_it *di)
430 struct osd_it_quota *it = (struct osd_it_quota *)di;
437 * Restore iterator from cookie. if the \a hash isn't found,
438 * restore the first valid record.
440 * \param di - osd iterator
441 * \param hash - iterator location cookie
443 * \retval +ve - di points to the first valid record
444 * \retval +1 - di points to exact matched hash
445 * \retval -ve - failure
447 static int osd_it_acct_load(const struct lu_env *env,
448 const struct dt_it *di, __u64 hash)
450 struct osd_it_quota *it = (struct osd_it_quota *)di;
455 * LU-8999 - If it is called to resume the iteration, calling
456 * osd_it_acct_get could change the block orders in the lower level
457 * of the quota tree, which are saved in osd_it_quota->oiq_blk.
459 if (it->oiq_id != 0 && it->oiq_id == hash)
462 RETURN(osd_it_acct_get(env, (struct dt_it *)di,
463 (const struct dt_key *)&hash));
467 * Index and Iterator operations for accounting objects
469 const struct dt_index_operations osd_acct_index_ops = {
470 .dio_lookup = osd_acct_index_lookup,
472 .init = osd_it_acct_init,
473 .fini = osd_it_acct_fini,
474 .get = osd_it_acct_get,
475 .put = osd_it_acct_put,
476 .next = osd_it_acct_next,
477 .key = osd_it_acct_key,
478 .key_size = osd_it_acct_key_size,
479 .rec = osd_it_acct_rec,
480 .store = osd_it_acct_store,
481 .load = osd_it_acct_load
485 static inline void osd_quota_swab(char *ptr, size_t size)
489 LASSERT((size & (sizeof(__u64) - 1)) == 0);
491 for (offset = 0; offset < size; offset += sizeof(__u64))
492 __swab64s((__u64 *)(ptr + offset));
495 const struct dt_rec *osd_quota_pack(struct osd_object *obj,
496 const struct dt_rec *rec,
497 union lquota_rec *quota_rec)
500 struct iam_descr *descr;
502 LASSERT(obj->oo_dir != NULL);
503 descr = obj->oo_dir->od_container.ic_descr;
505 memcpy(quota_rec, rec, descr->id_rec_size);
507 osd_quota_swab((char *)quota_rec, descr->id_rec_size);
508 return (const struct dt_rec *)quota_rec;
514 void osd_quota_unpack(struct osd_object *obj, const struct dt_rec *rec)
517 struct iam_descr *descr;
519 LASSERT(obj->oo_dir != NULL);
520 descr = obj->oo_dir->od_container.ic_descr;
522 osd_quota_swab((char *)rec, descr->id_rec_size);
526 static inline int osd_qid_type(struct osd_thandle *oh, int i)
528 return oh->ot_id_types[i];
532 * Reserve journal credits for quota files update first, then call
533 * ->op_begin() to perform quota enforcement.
535 * \param env - the environment passed by the caller
536 * \param oh - osd transaction handle
537 * \param qi - quota id & space required for this operation
538 * \param obj - osd object, could be NULL when it's under create
539 * \param enforce - whether to perform quota enforcement
540 * \param flags - if the operation is write, return no user quota, no
541 * group quota, or sync commit flags to the caller
543 * \retval 0 - success
544 * \retval -ve - failure
546 int osd_declare_qid(const struct lu_env *env, struct osd_thandle *oh,
547 struct lquota_id_info *qi, struct osd_object *obj,
548 bool enforce, enum osd_quota_local_flags *local_flags)
550 struct osd_device *dev;
551 struct qsd_instance *qsd;
552 struct lu_fid fid = { 0 };
553 struct inode *inode = NULL;
554 unsigned long long ino = 0;
556 __u8 res = qi->lqi_is_blk ? LQUOTA_RES_DT : LQUOTA_RES_MD;
561 fid = *lu_object_fid(&obj->oo_dt.do_lu);
562 inode = obj->oo_inode;
563 ino = inode ? inode->i_ino : 0;
565 CDEBUG(D_QUOTA, "fid="DFID" ino=%llu type=%u, id=%llu\n",
566 PFID(&fid), ino, qi->lqi_type, qi->lqi_id.qid_uid);
569 LASSERTF(oh->ot_id_cnt <= OSD_MAX_UGID_CNT, "count=%d\n",
572 dev = osd_dt_dev(oh->ot_super.th_dev);
573 LASSERT(dev != NULL);
575 if (res == LQUOTA_RES_DT)
576 qsd = dev->od_quota_slave_dt;
578 qsd = dev->od_quota_slave_md;
580 for (i = 0; i < oh->ot_id_cnt; i++) {
581 if (oh->ot_id_array[i] == qi->lqi_id.qid_uid &&
582 oh->ot_id_res[i] == res &&
583 oh->ot_id_types[i] == qi->lqi_type) {
590 /* we need to account for credits for this new ID */
591 if (i >= OSD_MAX_UGID_CNT) {
593 CERROR("%s: too many qids %u > %u on "DFID": rc = %d\n",
594 osd_name(dev), i + 1, OSD_MAX_UGID_CNT,
599 if (qi->lqi_id.qid_uid == 0) {
600 /* root ID should be always present in the quota file */
603 /* can't rely on the current state as it can change
605 * if used space for this ID could be dropped to zero,
606 * reserve extra credits for removing ID entry from
609 if (qi->lqi_space < 0)
610 crd = LDISKFS_QUOTA_DEL_BLOCKS(osd_sb(dev));
612 crd = LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(dev));
615 osd_trans_declare_op(env, oh, OSD_OT_QUOTA, crd);
617 oh->ot_id_array[i] = qi->lqi_id.qid_uid;
618 oh->ot_id_types[i] = qi->lqi_type;
619 oh->ot_id_res[i] = res;
623 if (unlikely(qsd == NULL))
624 /* quota slave instance hasn't been allocated yet */
629 rc = qsd_op_begin(env, qsd, oh->ot_quota_trans, qi,
635 * Wrapper for osd_declare_qid()
637 * \param env - the environment passed by the caller
638 * \param uid - user id of the inode
639 * \param gid - group id of the inode
640 * \param space - how many blocks/inodes will be consumed/released
641 * \param oh - osd transaction handle
642 * \param obj - osd object, could be NULL when it's under create
643 * \param flags - if the operation is write, return no user quota, no
644 * group quota, or sync commit flags to the caller
645 * \param osd_qid_flags - indicate this is a inode/block accounting
646 * and whether changes are performed by root user
648 * \retval 0 - success
649 * \retval -ve - failure
651 int osd_declare_inode_qid(const struct lu_env *env, qid_t uid, qid_t gid,
652 __u32 projid, long long space, struct osd_thandle *oh,
653 struct osd_object *obj,
654 enum osd_quota_local_flags *local_flags,
655 enum osd_qid_declare_flags osd_qid_declare_flags)
657 struct osd_thread_info *info = osd_oti_get(env);
658 struct lquota_id_info *qi = &info->oti_qi;
659 int rcu, rcg, rcp = 0; /* user & group & project rc */
660 struct thandle *th = &oh->ot_super;
661 bool force = !!(osd_qid_declare_flags & OSD_QID_FORCE) ||
665 /* very fast path for special files like llog */
666 if (uid == 0 && gid == 0 && projid == 0)
669 /* let's start with user quota */
670 qi->lqi_id.qid_uid = uid;
671 qi->lqi_type = USRQUOTA;
672 qi->lqi_space = space;
673 qi->lqi_is_blk = !!(osd_qid_declare_flags & OSD_QID_BLK);
674 rcu = osd_declare_qid(env, oh, qi, obj, true, local_flags);
676 if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS))
677 /* ignore EDQUOT & EINPROGRESS when changes are done by root */
681 * For non-fatal error, we want to continue to get the noquota flags
682 * for group id. This is only for commit write, which has @flags passed
683 * in. See osd_declare_write_commit().
684 * When force is set to true, we also want to proceed with the gid
686 if (rcu && (rcu != -EDQUOT || local_flags == NULL))
689 /* and now group quota */
690 qi->lqi_id.qid_gid = gid;
691 qi->lqi_type = GRPQUOTA;
692 rcg = osd_declare_qid(env, oh, qi, obj, true, local_flags);
694 if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS))
695 /* as before, ignore EDQUOT & EINPROGRESS for root */
698 #ifdef HAVE_PROJECT_QUOTA
699 if (rcg && (rcg != -EDQUOT || local_flags == NULL))
702 /* and now project quota */
703 qi->lqi_id.qid_projid = projid;
704 qi->lqi_type = PRJQUOTA;
705 rcp = osd_declare_qid(env, oh, qi, obj, true, local_flags);
707 if (local_flags && *local_flags & QUOTA_FL_ROOT_PRJQUOTA)
708 force = th->th_ignore_quota;
709 if (force && (rcp == -EDQUOT || rcp == -EINPROGRESS)) {
710 CDEBUG(D_QUOTA, "forced to ignore quota flags = %#x\n",
711 local_flags ? *local_flags : -1);
712 /* as before, ignore EDQUOT & EINPROGRESS for root */
717 RETURN(rcu ? rcu : (rcg ? rcg : rcp));