4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann@whamcloud.com>
30 #include <dt_object.h>
31 #include <lustre_quota.h>
33 #include "osd_internal.h"
36 * Helper function to estimate the number of inodes in use for the given
37 * uid/gid/projid from the block usage
39 static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes)
41 uint64_t refdbytes, availbytes, usedobjs, availobjs;
42 uint64_t uidobjs, bshift;
44 /* get fresh statfs info */
45 dmu_objset_space(osd->od_os, &refdbytes, &availbytes,
46 &usedobjs, &availobjs);
48 /* estimate the number of objects based on the disk usage */
49 bshift = fls64(osd->od_max_blksz) - 1;
50 uidobjs = osd_objs_count_estimate(refdbytes, usedobjs,
51 uidbytes >> bshift, bshift);
53 /* if we have at least 1 byte, we have at least one dnode ... */
54 uidobjs = max_t(uint64_t, uidobjs, 1);
60 * Space Accounting Management
64 * Return space usage consumed by a given uid or gid or projid.
65 * Block usage is accurrate since it is maintained by DMU itself.
66 * However, DMU does not provide inode accounting, so the #inodes in use
67 * is estimated from the block usage and statfs information.
69 * \param env - is the environment passed by the caller
70 * \param dtobj - is the accounting object
71 * \param dtrec - is the record to fill with space usage information
72 * \param dtkey - is the id the of the user or group for which we would
73 * like to access disk usage.
75 * \retval +ve - success : exact match
76 * \retval -ve - failure
78 static int osd_acct_index_lookup(const struct lu_env *env,
79 struct dt_object *dtobj,
81 const struct dt_key *dtkey)
83 struct osd_thread_info *info = osd_oti_get(env);
84 char *buf = info->oti_buf;
85 struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
86 struct osd_object *obj = osd_dt_obj(dtobj);
87 struct osd_device *osd = osd_obj2dev(obj);
88 dnode_t *dn = obj->oo_dn;
89 size_t buflen = sizeof(info->oti_buf);
93 rec->bspace = rec->ispace = 0;
95 /* convert the 64-bit uid/gid/projid into a string */
96 snprintf(buf, buflen, "%llx", *((__u64 *)dtkey));
98 CDEBUG(D_QUOTA, "%s: miss accounting obj for %s\n",
104 /* disk usage (in bytes) is maintained by DMU.
105 * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which
106 * not associated with any dmu_but_t (see dnode_special_open()).
108 rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, sizeof(uint64_t), 1,
111 /* user/group/project has not created anything yet */
112 CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n",
113 osd->od_svname, buf);
114 /* -ENOENT is normal case, convert it as 1. */
120 if (!osd_dmu_userobj_accounting_available(osd)) {
121 if (rec->bspace != 0)
122 /* estimate #inodes in use */
123 rec->ispace = osd_objset_user_iused(osd, rec->bspace);
126 snprintf(buf, buflen, DMU_OBJACCT_PREFIX "%llx",
128 rc = osd_zap_lookup(osd, dn->dn_object, dn, buf,
129 sizeof(uint64_t), 1, &rec->ispace);
132 "%s: id %s not found dnode accounting\n",
133 osd->od_svname, buf);
134 /* -ENOENT is normal case, convert it as 1. */
136 } else if (rc == 0) {
145 * Initialize osd Iterator for given osd index object.
147 * \param dt - osd index object
148 * \param attr - not used
150 static struct dt_it *osd_it_acct_init(const struct lu_env *env,
151 struct dt_object *dt,
154 struct osd_thread_info *info = osd_oti_get(env);
155 struct osd_it_quota *it;
156 struct osd_object *obj = osd_dt_obj(dt);
157 struct osd_device *osd = osd_obj2dev(obj);
158 dnode_t *dn = obj->oo_dn;
163 CDEBUG(D_QUOTA, "%s: Not found in DMU accounting ZAP\n",
166 RETURN(ERR_PTR(-ENOENT));
170 RETURN(ERR_PTR(-ENOMEM));
174 RETURN(ERR_PTR(-ENOMEM));
176 memset(it, 0, sizeof(*it));
177 it->oiq_oid = dn->dn_object;
179 /* initialize zap cursor */
180 rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0);
186 /* take object reference */
187 lu_object_get(&dt->do_lu);
188 it->oiq_obj = osd_dt_obj(dt);
191 RETURN((struct dt_it *)it);
195 * Free given iterator.
197 * \param di - osd iterator
199 static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
201 struct osd_it_quota *it = (struct osd_it_quota *)di;
204 osd_zap_cursor_fini(it->oiq_zc);
205 osd_object_put(env, it->oiq_obj);
212 * Locate the first entry that is for space accounting.
214 static int osd_zap_locate(struct osd_it_quota *it, zap_attribute_t *za)
220 rc = -zap_cursor_retrieve(it->oiq_zc, za);
224 if (strncmp(za->za_name, DMU_OBJACCT_PREFIX,
225 DMU_OBJACCT_PREFIX_LEN))
228 zap_cursor_advance(it->oiq_zc);
235 * Move on to the next valid entry.
237 * \param di - osd iterator
239 * \retval +ve - iterator reached the end
240 * \retval 0 - iterator has not reached the end yet
241 * \retval -ve - unexpected failure
243 static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
245 struct osd_it_quota *it = (struct osd_it_quota *)di;
246 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
250 if (it->oiq_reset == 0)
251 zap_cursor_advance(it->oiq_zc);
254 rc = osd_zap_locate(it, za);
255 RETURN(rc == -ENOENT ? 1 : rc);
259 * Return pointer to the key under iterator.
261 * \param di - osd iterator
263 static struct dt_key *osd_it_acct_key(const struct lu_env *env,
264 const struct dt_it *di)
266 struct osd_it_quota *it = (struct osd_it_quota *)di;
267 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
272 rc = osd_zap_locate(it, za);
276 rc = kstrtoull(za->za_name, 16, &it->oiq_id);
278 CERROR("couldn't parse name %s: rc = %d\n", za->za_name, rc);
280 RETURN((struct dt_key *) &it->oiq_id);
284 * Return size of key under iterator (in bytes)
286 * \param di - osd iterator
288 static int osd_it_acct_key_size(const struct lu_env *env,
289 const struct dt_it *di)
292 RETURN((int)sizeof(uint64_t));
296 * zap_cursor_retrieve read from current record.
297 * to read bytes we need to call zap_lookup explicitly.
299 static int osd_zap_cursor_retrieve_value(const struct lu_env *env,
300 struct osd_it_quota *it,
301 char *buf, int buf_size,
304 const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
305 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
306 zap_cursor_t *zc = it->oiq_zc;
307 struct osd_device *osd = osd_obj2dev(it->oiq_obj);
310 rc = -zap_cursor_retrieve(zc, za);
311 if (unlikely(rc != 0))
314 if (unlikely(za->za_integer_length <= 0))
317 actual_size = za->za_integer_length * za->za_num_integers;
319 if (actual_size > buf_size) {
320 actual_size = buf_size;
321 buf_size = actual_size / za->za_integer_length;
323 buf_size = za->za_num_integers;
326 /* use correct special ID to request bytes used */
327 rc = osd_zap_lookup(osd, fid_oid(fid) == ACCT_GROUP_OID ?
328 DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT, NULL,
329 za->za_name, za->za_integer_length, buf_size, buf);
331 *bytes_read = actual_size;
337 * Return pointer to the record under iterator.
339 * \param di - osd iterator
340 * \param attr - not used
342 static int osd_it_acct_rec(const struct lu_env *env,
343 const struct dt_it *di,
344 struct dt_rec *dtrec, __u32 attr)
346 struct osd_thread_info *info = osd_oti_get(env);
347 zap_attribute_t *za = &info->oti_za;
348 struct osd_it_quota *it = (struct osd_it_quota *)di;
349 struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
350 struct osd_object *obj = it->oiq_obj;
351 struct osd_device *osd = osd_obj2dev(obj);
357 rec->ispace = rec->bspace = 0;
359 /* retrieve block usage from the DMU accounting object */
360 rc = osd_zap_cursor_retrieve_value(env, it, (char *)&rec->bspace,
361 sizeof(uint64_t), &bytes_read);
365 if (!osd_dmu_userobj_accounting_available(osd)) {
366 if (rec->bspace != 0)
367 /* estimate #inodes in use */
368 rec->ispace = osd_objset_user_iused(osd, rec->bspace);
372 /* retrieve key associated with the current cursor */
373 rc = -zap_cursor_retrieve(it->oiq_zc, za);
374 if (unlikely(rc != 0))
377 /* inode accounting is maintained by DMU since 0.7.0 */
378 strncpy(info->oti_buf, DMU_OBJACCT_PREFIX,
379 DMU_OBJACCT_PREFIX_LEN);
380 strscpy(info->oti_buf + DMU_OBJACCT_PREFIX_LEN, za->za_name,
381 sizeof(info->oti_buf) - DMU_OBJACCT_PREFIX_LEN);
382 rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object,
383 it->oiq_obj->oo_dn, info->oti_buf, sizeof(uint64_t),
386 /* user/group has not created any file yet */
387 CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
388 osd->od_svname, info->oti_buf);
396 * Returns cookie for current Iterator position.
398 * \param di - osd iterator
400 static __u64 osd_it_acct_store(const struct lu_env *env,
401 const struct dt_it *di)
403 struct osd_it_quota *it = (struct osd_it_quota *)di;
407 RETURN(osd_zap_cursor_serialize(it->oiq_zc));
411 * Restore iterator from cookie. if the \a hash isn't found,
412 * restore the first valid record.
414 * \param di - osd iterator
415 * \param hash - iterator location cookie
417 * \retval +ve - di points to exact matched key
418 * \retval 0 - di points to the first valid record
419 * \retval -ve - failure
421 static int osd_it_acct_load(const struct lu_env *env,
422 const struct dt_it *di, __u64 hash)
424 struct osd_it_quota *it = (struct osd_it_quota *)di;
425 struct osd_device *osd = osd_obj2dev(it->oiq_obj);
426 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
431 /* create new cursor pointing to the new hash */
432 rc = osd_zap_cursor_init(&zc, osd->od_os, it->oiq_oid, hash);
435 osd_zap_cursor_fini(it->oiq_zc);
439 rc = osd_zap_locate(it, za);
442 else if (rc == -ENOENT)
448 * Move Iterator to record specified by \a key, if the \a key isn't found,
449 * move to the first valid record.
451 * \param di - osd iterator
452 * \param key - uid or gid or projid
454 * \retval +ve - di points to exact matched key
455 * \retval 0 - di points to the first valid record
456 * \retval -ve - failure
458 static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
459 const struct dt_key *key)
463 /* XXX: like osd_zap_it_get(), API is currently broken */
464 LASSERT(*((__u64 *)key) == 0);
466 RETURN(osd_it_acct_load(env, di, 0));
472 * \param di - osd iterator
474 static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
479 * Index and Iterator operations for accounting objects
481 const struct dt_index_operations osd_acct_index_ops = {
482 .dio_lookup = osd_acct_index_lookup,
484 .init = osd_it_acct_init,
485 .fini = osd_it_acct_fini,
486 .get = osd_it_acct_get,
487 .put = osd_it_acct_put,
488 .next = osd_it_acct_next,
489 .key = osd_it_acct_key,
490 .key_size = osd_it_acct_key_size,
491 .rec = osd_it_acct_rec,
492 .store = osd_it_acct_store,
493 .load = osd_it_acct_load
498 * Quota Enforcement Management
502 * Wrapper for qsd_op_begin().
504 * \param env - the environment passed by the caller
505 * \param osd - is the osd_device
506 * \param uid - user id of the inode
507 * \param gid - group id of the inode
508 * \param projid - project id of the inode
509 * \param space - how many blocks/inodes will be consumed/released
510 * \param oh - osd transaction handle
511 * \param flags - if the operation is write, return no user quota, no
512 * group quota, or sync commit flags to the caller
513 * \param osd_qid_declare_flags - indicate this is a inode/block accounting
514 * and whether changes are performed by root user
516 * \retval 0 - success
517 * \retval -ve - failure
519 int osd_declare_quota(const struct lu_env *env, struct osd_device *osd,
520 qid_t uid, qid_t gid, qid_t projid, long long space,
521 struct osd_thandle *oh,
522 enum osd_quota_local_flags *local_flags,
523 enum osd_qid_declare_flags osd_qid_declare_flags)
525 struct osd_thread_info *info = osd_oti_get(env);
526 struct lquota_id_info *qi = &info->oti_qi;
527 struct qsd_instance *qsd = NULL;
528 int rcu, rcg, rcp = 0; /* user & group & project rc */
529 struct thandle *th = &oh->ot_super;
530 bool force = !!(osd_qid_declare_flags & OSD_QID_FORCE) ||
534 /* very fast path for special files like llog */
535 if (uid == 0 && gid == 0 && projid == 0)
538 if (osd_qid_declare_flags & OSD_QID_INODE)
539 qsd = osd->od_quota_slave_md;
540 else if (osd_qid_declare_flags & OSD_QID_BLK)
541 qsd = osd->od_quota_slave_dt;
545 if (unlikely(qsd == NULL))
546 /* quota slave instance hasn't been allocated yet */
549 /* let's start with user quota */
550 qi->lqi_id.qid_uid = uid;
551 qi->lqi_type = USRQUOTA;
552 qi->lqi_space = space;
553 qi->lqi_is_blk = !!(osd_qid_declare_flags & OSD_QID_BLK);
554 rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, local_flags);
555 if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS))
556 /* ignore EDQUOT & EINPROGRESS when changes are done by root */
559 /* For non-fatal error, we want to continue to get the noquota flags
560 * for group id. This is only for commit write, which has @flags passed
561 * in. See osd_declare_write_commit().
562 * When force is set to true, we also want to proceed with the gid
564 if (rcu && (rcu != -EDQUOT || local_flags == NULL))
567 /* and now group quota */
568 qi->lqi_id.qid_gid = gid;
569 qi->lqi_type = GRPQUOTA;
570 rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, local_flags);
571 if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS))
572 /* as before, ignore EDQUOT & EINPROGRESS for root */
575 #ifdef ZFS_PROJINHERIT
576 if (rcg && (rcg != -EDQUOT || local_flags == NULL))
579 /* for project quota */
580 if (osd->od_projectused_dn) {
581 qi->lqi_id.qid_projid = projid;
582 qi->lqi_type = PRJQUOTA;
583 rcp = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi,
585 if (local_flags && *local_flags & QUOTA_FL_ROOT_PRJQUOTA)
586 force = th->th_ignore_quota;
587 if (force && (rcp == -EDQUOT || rcp == -EINPROGRESS))
592 RETURN(rcu ? rcu : (rcg ? rcg : rcp));