4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2015, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann@whamcloud.com>
28 * Author: Niu Yawei <niu@whamcloud.com>
31 #include <lustre_quota.h>
32 #include "osd_internal.h"
35 * Helpers function to find out the quota type (USRQUOTA/GRPQUOTA) of a
38 static inline int fid2type(const struct lu_fid *fid)
40 LASSERT(fid_is_acct(fid));
41 switch (fid_oid(fid)) {
48 LASSERTF(0, "invalid fid for quota type: %u", fid_oid(fid));
53 * Space Accounting Management
57 * Look up an accounting object based on its fid.
59 * \param info - is the osd thread info passed by the caller
60 * \param osd - is the osd device
61 * \param fid - is the fid of the accounting object we want to look up
62 * \param id - is the osd_inode_id struct to fill with the inode number of
63 * the quota file if the lookup is successful
65 int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd,
66 const struct lu_fid *fid, struct osd_inode_id *id)
68 struct super_block *sb = osd_sb(osd);
69 unsigned long qf_inums[LL_MAXQUOTAS] = {
70 le32_to_cpu(LDISKFS_SB(sb)->s_es->s_usr_quota_inum),
71 le32_to_cpu(LDISKFS_SB(sb)->s_es->s_grp_quota_inum)
75 LASSERT(fid_is_acct(fid));
77 if (!LDISKFS_HAS_RO_COMPAT_FEATURE(sb,
78 LDISKFS_FEATURE_RO_COMPAT_QUOTA))
81 id->oii_gen = OSD_OII_NOGEN;
82 id->oii_ino = qf_inums[fid2type(fid)];
83 if (!ldiskfs_valid_inum(sb, id->oii_ino))
89 * Return space usage (#blocks & #inodes) consumed by a given uid or gid.
91 * \param env - is the environment passed by the caller
92 * \param dtobj - is the accounting object
93 * \param dtrec - is the record to fill with space usage information
94 * \param dtkey - is the id of the user or group for which we would
95 * like to access disk usage.
97 * \retval +ve - success : exact match
98 * \retval -ve - failure
100 static int osd_acct_index_lookup(const struct lu_env *env,
101 struct dt_object *dtobj,
102 struct dt_rec *dtrec,
103 const struct dt_key *dtkey)
105 struct osd_thread_info *info = osd_oti_get(env);
106 #if defined(HAVE_DQUOT_QC_DQBLK)
107 struct qc_dqblk *dqblk = &info->oti_qdq;
108 #elif defined(HAVE_DQUOT_FS_DISK_QUOTA)
109 struct fs_disk_quota *dqblk = &info->oti_fdq;
111 struct if_dqblk *dqblk = &info->oti_dqblk;
113 struct super_block *sb = osd_sb(osd_obj2dev(osd_dt_obj(dtobj)));
114 struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
115 __u64 id = *((__u64 *)dtkey);
117 #ifdef HAVE_DQUOT_KQID
124 type = fid2type(lu_object_fid(&dtobj->do_lu));
125 memset(dqblk, 0, sizeof(*dqblk));
126 #ifdef HAVE_DQUOT_KQID
127 qid = make_kqid(&init_user_ns, type, id);
128 rc = sb->s_qcop->get_dqblk(sb, qid, dqblk);
130 rc = sb->s_qcop->get_dqblk(sb, type, (qid_t) id, dqblk);
134 #if defined(HAVE_DQUOT_QC_DQBLK)
135 rec->bspace = dqblk->d_space;
136 rec->ispace = dqblk->d_ino_count;
137 #elif defined(HAVE_DQUOT_FS_DISK_QUOTA)
138 rec->bspace = dqblk->d_bcount;
139 rec->ispace = dqblk->d_icount;
141 rec->bspace = dqblk->dqb_curspace;
142 rec->ispace = dqblk->dqb_curinodes;
147 #define QUOTA_IT_READ_ERROR(it, rc) \
148 CERROR("%s: Error while trying to read quota information, " \
149 "failed with %d\n", \
150 osd_dev(it->oiq_obj->oo_dt.do_lu.lo_dev)->od_svname, rc); \
153 * Initialize osd Iterator for given osd index object.
155 * \param dt - osd index object
156 * \param attr - not used
158 static struct dt_it *osd_it_acct_init(const struct lu_env *env,
159 struct dt_object *dt,
162 struct osd_it_quota *it;
163 struct lu_object *lo = &dt->do_lu;
164 struct osd_object *obj = osd_dt_obj(dt);
168 LASSERT(lu_object_exists(lo));
172 RETURN(ERR_PTR(-ENOMEM));
176 INIT_LIST_HEAD(&it->oiq_list);
178 /* LUSTRE_DQTREEOFF is the initial offset where the tree can be found */
179 it->oiq_blk[0] = LUSTRE_DQTREEOFF;
181 /* NB: we don't need to store the tree depth since it is always
182 * equal to LUSTRE_DQTREEDEPTH - 1 (root has depth = 0) for a leaf
184 RETURN((struct dt_it *)it);
188 * Free given iterator.
190 * \param di - osd iterator
192 static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
194 struct osd_it_quota *it = (struct osd_it_quota *)di;
195 struct osd_quota_leaf *leaf, *tmp;
198 osd_object_put(env, it->oiq_obj);
200 list_for_each_entry_safe(leaf, tmp, &it->oiq_list, oql_link) {
201 list_del_init(&leaf->oql_link);
211 * Move Iterator to record specified by \a key, if the \a key isn't found,
212 * move to the first valid record.
214 * \param di - osd iterator
215 * \param key - uid or gid
217 * \retval +ve - di points to the first valid record
218 * \retval +1 - di points to exact matched key
219 * \retval -ve - failure
221 static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
222 const struct dt_key *key)
224 struct osd_it_quota *it = (struct osd_it_quota *)di;
225 const struct lu_fid *fid =
226 lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
228 qid_t dqid = *(qid_t *)key;
233 type = fid2type(fid);
235 offset = find_tree_dqentry(env, it->oiq_obj, type, dqid,
236 LUSTRE_DQTREEOFF, 0, it);
237 if (offset > 0) { /* Found */
239 } else if (offset < 0) { /* Error */
240 QUOTA_IT_READ_ERROR(it, (int)offset);
244 /* The @key is not found, move to the first valid entry */
245 rc = walk_tree_dqentry(env, it->oiq_obj, type, it->oiq_blk[0], 0,
258 * \param di - osd iterator
260 static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
265 static int osd_it_add_processed(struct osd_it_quota *it, int depth)
267 struct osd_quota_leaf *leaf;
272 INIT_LIST_HEAD(&leaf->oql_link);
273 leaf->oql_blk = it->oiq_blk[depth];
274 list_add_tail(&leaf->oql_link, &it->oiq_list);
279 * Move on to the next valid entry.
281 * \param di - osd iterator
283 * \retval +ve - iterator reached the end
284 * \retval 0 - iterator has not reached the end yet
285 * \retval -ve - unexpected failure
287 static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
289 struct osd_it_quota *it = (struct osd_it_quota *)di;
290 const struct lu_fid *fid =
291 lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
298 type = fid2type(fid);
300 /* Let's first check if there are any remaining valid entry in the
301 * current leaf block. Start with the next entry after the current one.
303 depth = LUSTRE_DQTREEDEPTH;
304 index = it->oiq_index[depth];
305 if (++index < LUSTRE_DQSTRINBLK) {
306 /* Search for the next valid entry from current index */
307 rc = walk_block_dqentry(env, it->oiq_obj, type,
308 it->oiq_blk[depth], index, it);
310 QUOTA_IT_READ_ERROR(it, rc);
312 } else if (rc == 0) {
313 /* Found on entry, @it is already updated to the
314 * new position in walk_block_dqentry(). */
317 rc = osd_it_add_processed(it, depth);
322 rc = osd_it_add_processed(it, depth);
328 /* We have consumed all the entries of the current leaf block, move on
329 * to the next one. */
332 /* We keep searching as long as walk_tree_dqentry() returns +1
333 * (= no valid entry found). */
334 for (; depth >= 0 && rc > 0; depth--) {
335 index = it->oiq_index[depth];
338 rc = walk_tree_dqentry(env, it->oiq_obj, type,
339 it->oiq_blk[depth], depth, index, it);
343 QUOTA_IT_READ_ERROR(it, rc);
348 * Return pointer to the key under iterator.
350 * \param di - osd iterator
352 static struct dt_key *osd_it_acct_key(const struct lu_env *env,
353 const struct dt_it *di)
355 struct osd_it_quota *it = (struct osd_it_quota *)di;
358 RETURN((struct dt_key *)&it->oiq_id);
362 * Return size of key under iterator (in bytes)
364 * \param di - osd iterator
366 static int osd_it_acct_key_size(const struct lu_env *env,
367 const struct dt_it *di)
369 struct osd_it_quota *it = (struct osd_it_quota *)di;
372 RETURN((int)sizeof(it->oiq_id));
376 * Return pointer to the record under iterator.
378 * \param di - osd iterator
379 * \param attr - not used
381 static int osd_it_acct_rec(const struct lu_env *env,
382 const struct dt_it *di,
383 struct dt_rec *dtrec, __u32 attr)
385 struct osd_it_quota *it = (struct osd_it_quota *)di;
386 const struct dt_key *key = osd_it_acct_key(env, di);
391 rc = osd_acct_index_lookup(env, &it->oiq_obj->oo_dt, dtrec, key);
392 RETURN(rc > 0 ? 0 : rc);
396 * Returns cookie for current Iterator position.
398 * \param di - osd iterator
400 static __u64 osd_it_acct_store(const struct lu_env *env,
401 const struct dt_it *di)
403 struct osd_it_quota *it = (struct osd_it_quota *)di;
410 * Restore iterator from cookie. if the \a hash isn't found,
411 * restore the first valid record.
413 * \param di - osd iterator
414 * \param hash - iterator location cookie
416 * \retval +ve - di points to the first valid record
417 * \retval +1 - di points to exact matched hash
418 * \retval -ve - failure
420 static int osd_it_acct_load(const struct lu_env *env,
421 const struct dt_it *di, __u64 hash)
424 RETURN(osd_it_acct_get(env, (struct dt_it *)di,
425 (const struct dt_key *)&hash));
429 * Index and Iterator operations for accounting objects
431 const struct dt_index_operations osd_acct_index_ops = {
432 .dio_lookup = osd_acct_index_lookup,
434 .init = osd_it_acct_init,
435 .fini = osd_it_acct_fini,
436 .get = osd_it_acct_get,
437 .put = osd_it_acct_put,
438 .next = osd_it_acct_next,
439 .key = osd_it_acct_key,
440 .key_size = osd_it_acct_key_size,
441 .rec = osd_it_acct_rec,
442 .store = osd_it_acct_store,
443 .load = osd_it_acct_load
447 static inline void osd_quota_swab(char *ptr, size_t size)
451 LASSERT((size & (sizeof(__u64) - 1)) == 0);
453 for (offset = 0; offset < size; offset += sizeof(__u64))
454 __swab64s((__u64 *)(ptr + offset));
457 const struct dt_rec *osd_quota_pack(struct osd_object *obj,
458 const struct dt_rec *rec,
459 union lquota_rec *quota_rec)
462 struct iam_descr *descr;
464 LASSERT(obj->oo_dir != NULL);
465 descr = obj->oo_dir->od_container.ic_descr;
467 memcpy(quota_rec, rec, descr->id_rec_size);
469 osd_quota_swab((char *)quota_rec, descr->id_rec_size);
470 return (const struct dt_rec *)quota_rec;
476 void osd_quota_unpack(struct osd_object *obj, const struct dt_rec *rec)
479 struct iam_descr *descr;
481 LASSERT(obj->oo_dir != NULL);
482 descr = obj->oo_dir->od_container.ic_descr;
484 osd_quota_swab((char *)rec, descr->id_rec_size);
490 static inline int osd_qid_type(struct osd_thandle *oh, int i)
492 return oh->ot_id_types[i];
496 * Reserve journal credits for quota files update first, then call
497 * ->op_begin() to perform quota enforcement.
499 * \param env - the environment passed by the caller
500 * \param oh - osd transaction handle
501 * \param qi - quota id & space required for this operation
502 * \param obj - osd object, could be NULL when it's under create
503 * \param enforce - whether to perform quota enforcement
504 * \param flags - if the operation is write, return no user quota, no
505 * group quota, or sync commit flags to the caller
507 * \retval 0 - success
508 * \retval -ve - failure
510 int osd_declare_qid(const struct lu_env *env, struct osd_thandle *oh,
511 struct lquota_id_info *qi, struct osd_object *obj,
512 bool enforce, int *flags)
514 struct osd_device *dev;
515 struct qsd_instance *qsd;
516 struct inode *inode = NULL;
522 LASSERTF(oh->ot_id_cnt <= OSD_MAX_UGID_CNT, "count=%d\n",
525 dev = osd_dt_dev(oh->ot_super.th_dev);
526 LASSERT(dev != NULL);
528 qsd = dev->od_quota_slave;
530 for (i = 0; i < oh->ot_id_cnt; i++) {
531 if (oh->ot_id_array[i] == qi->lqi_id.qid_uid &&
532 oh->ot_id_types[i] == qi->lqi_type) {
539 /* we need to account for credits for this new ID */
540 if (i >= OSD_MAX_UGID_CNT) {
541 CERROR("Too many(%d) trans qids!\n", i + 1);
546 inode = obj->oo_inode;
548 /* root ID entry should be always present in the quota file */
549 if (qi->lqi_id.qid_uid == 0) {
552 /* used space for this ID could be dropped to zero,
553 * reserve extra credits for removing ID entry from
555 if (qi->lqi_space < 0)
556 crd = LDISKFS_QUOTA_DEL_BLOCKS(osd_sb(dev));
557 /* reserve credits for adding ID entry to the quota
558 * file if the i_dquot isn't initialized yet. */
559 else if (inode == NULL ||
560 #ifdef HAVE_EXT4_INFO_DQUOT
561 LDISKFS_I(inode)->i_dquot[qi->lqi_type] == NULL)
563 inode->i_dquot[qi->lqi_type] == NULL)
565 crd = LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(dev));
570 osd_trans_declare_op(env, oh, OSD_OT_QUOTA, crd);
572 oh->ot_id_array[i] = qi->lqi_id.qid_uid;
573 oh->ot_id_types[i] = qi->lqi_type;
577 if (unlikely(qsd == NULL))
578 /* quota slave instance hasn't been allocated yet */
583 rc = qsd_op_begin(env, qsd, oh->ot_quota_trans, qi, flags);
588 * Wrapper for osd_declare_qid()
590 * \param env - the environment passed by the caller
591 * \param uid - user id of the inode
592 * \param gid - group id of the inode
593 * \param space - how many blocks/inodes will be consumed/released
594 * \param oh - osd transaction handle
595 * \param obj - osd object, could be NULL when it's under create
596 * \param is_blk - block quota or inode quota?
597 * \param flags - if the operation is write, return no user quota, no
598 * group quota, or sync commit flags to the caller
599 * \param force - set to 1 when changes are performed by root user and thus
600 * can't failed with EDQUOT
602 * \retval 0 - success
603 * \retval -ve - failure
605 int osd_declare_inode_qid(const struct lu_env *env, qid_t uid, qid_t gid,
606 long long space, struct osd_thandle *oh,
607 struct osd_object *obj, bool is_blk, int *flags,
610 struct osd_thread_info *info = osd_oti_get(env);
611 struct lquota_id_info *qi = &info->oti_qi;
612 int rcu, rcg; /* user & group rc */
615 /* let's start with user quota */
616 qi->lqi_id.qid_uid = uid;
617 qi->lqi_type = USRQUOTA;
618 qi->lqi_space = space;
619 qi->lqi_is_blk = is_blk;
620 rcu = osd_declare_qid(env, oh, qi, obj, true, flags);
622 if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS))
623 /* ignore EDQUOT & EINPROGRESS when changes are done by root */
626 /* For non-fatal error, we want to continue to get the noquota flags
627 * for group id. This is only for commit write, which has @flags passed
628 * in. See osd_declare_write_commit().
629 * When force is set to true, we also want to proceed with the gid */
630 if (rcu && (rcu != -EDQUOT || flags == NULL))
633 /* and now group quota */
634 qi->lqi_id.qid_gid = gid;
635 qi->lqi_type = GRPQUOTA;
636 rcg = osd_declare_qid(env, oh, qi, obj, true, flags);
638 if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS))
639 /* as before, ignore EDQUOT & EINPROGRESS for root */
642 RETURN(rcu ? rcu : rcg);
645 int osd_quota_migration(const struct lu_env *env, struct dt_object *dt)
647 struct osd_thread_info *oti = osd_oti_get(env);
648 struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
649 struct dt_object *root, *parent = NULL, *admin = NULL;
650 dt_obj_version_t version;
651 char *fname, *fnames[] = {ADMIN_USR, ADMIN_GRP};
655 /* not newly created global index */
656 version = dt_version_get(env, dt);
661 rc = dt_root_get(env, &osd->od_dt_dev, &oti->oti_fid);
663 CERROR("%s: Can't get root FID, rc:%d\n", osd->od_svname, rc);
667 root = dt_locate(env, &osd->od_dt_dev, &oti->oti_fid);
669 CERROR("%s: Failed to locate root "DFID", rc:%ld\n",
670 osd->od_svname, PFID(&oti->oti_fid), PTR_ERR(root));
671 RETURN(PTR_ERR(root));
674 /* locate /OBJECTS */
675 rc = dt_lookup_dir(env, root, OBJECTS, &oti->oti_fid);
679 CERROR("%s: Failed to lookup %s, rc:%d\n",
680 osd->od_svname, OBJECTS, rc);
684 parent = dt_locate(env, &osd->od_dt_dev, &oti->oti_fid);
685 if (IS_ERR(parent)) {
686 CERROR("%s: Failed to locate %s "DFID", rc:%ld\n",
687 osd->od_svname, OBJECTS, PFID(&oti->oti_fid),
689 GOTO(out, rc = PTR_ERR(parent));
692 /* locate quota admin files */
693 for (i = 0; i < 2; i++) {
695 rc = dt_lookup_dir(env, parent, fname, &oti->oti_fid);
700 CERROR("%s: Failed to lookup %s, rc:%d\n",
701 osd->od_svname, fname, rc);
705 admin = dt_locate(env, &osd->od_dt_dev, &oti->oti_fid);
707 CERROR("%s: Failed to locate %s "DFID", rc:%d\n",
708 osd->od_svname, fname, PFID(&oti->oti_fid), rc);
709 GOTO(out, rc = PTR_ERR(admin));
712 if (!dt_object_exists(admin)) {
713 CERROR("%s: Old admin file %s doesn't exist, but is "
714 "still referenced in parent directory.\n",
715 osd->od_svname, fname);
716 dt_object_put(env, admin);
717 GOTO(out, rc = -ENOENT);
720 LCONSOLE_WARN("%s: Detected old quota admin file(%s)! If you "
721 "want to keep the old quota limits settings, "
722 "please upgrade to lower version(2.5) first to "
723 "convert them into new format.\n",
724 osd->od_svname, fname);
726 dt_object_put(env, admin);
727 GOTO(out, rc = -EINVAL);
730 if (parent && !IS_ERR(parent))
731 dt_object_put(env, parent);
732 dt_object_put(env, root);