4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2015, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann@whamcloud.com>
30 #include <lustre_quota.h>
32 #include "osd_internal.h"
35 * Helper function to retrieve DMU object id from fid for accounting object
37 dnode_t *osd_quota_fid2dmu(const struct osd_device *osd,
38 const struct lu_fid *fid)
40 LASSERT(fid_is_acct(fid));
41 if (fid_oid(fid) == ACCT_GROUP_OID)
42 return osd->od_groupused_dn;
43 return osd->od_userused_dn;
47 * Helper function to estimate the number of inodes in use for a give uid/gid
48 * from the block usage
50 static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes)
52 uint64_t refdbytes, availbytes, usedobjs, availobjs;
53 uint64_t uidobjs, bshift;
55 /* get fresh statfs info */
56 dmu_objset_space(osd->od_os, &refdbytes, &availbytes,
57 &usedobjs, &availobjs);
59 /* estimate the number of objects based on the disk usage */
60 bshift = fls64(osd->od_max_blksz) - 1;
61 uidobjs = osd_objs_count_estimate(refdbytes, usedobjs,
62 uidbytes >> bshift, bshift);
64 /* if we have at least 1 byte, we have at least one dnode ... */
65 uidobjs = max_t(uint64_t, uidobjs, 1);
71 * Space Accounting Management
75 * Return space usage consumed by a given uid or gid.
76 * Block usage is accurrate since it is maintained by DMU itself.
77 * However, DMU does not provide inode accounting, so the #inodes in use
78 * is estimated from the block usage and statfs information.
80 * \param env - is the environment passed by the caller
81 * \param dtobj - is the accounting object
82 * \param dtrec - is the record to fill with space usage information
83 * \param dtkey - is the id the of the user or group for which we would
84 * like to access disk usage.
86 * \retval +ve - success : exact match
87 * \retval -ve - failure
89 static int osd_acct_index_lookup(const struct lu_env *env,
90 struct dt_object *dtobj,
92 const struct dt_key *dtkey)
94 struct osd_thread_info *info = osd_oti_get(env);
95 char *buf = info->oti_buf;
96 size_t buflen = sizeof(info->oti_buf);
97 struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
98 struct osd_object *obj = osd_dt_obj(dtobj);
99 struct osd_device *osd = osd_obj2dev(obj);
104 rec->bspace = rec->ispace = 0;
106 /* convert the 64-bit uid/gid into a string */
107 snprintf(buf, buflen, "%llx", *((__u64 *)dtkey));
108 /* fetch DMU object (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be
110 dn = osd_quota_fid2dmu(osd, lu_object_fid(&dtobj->do_lu));
112 /* disk usage (in bytes) is maintained by DMU.
113 * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which
114 * not associated with any dmu_but_t (see dnode_special_open()). */
115 rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, sizeof(uint64_t), 1,
118 /* user/group has not created anything yet */
119 CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n",
120 osd->od_svname, buf);
125 if (!osd_dmu_userobj_accounting_available(osd)) {
126 if (rec->bspace != 0)
127 /* estimate #inodes in use */
128 rec->ispace = osd_objset_user_iused(osd, rec->bspace);
131 snprintf(buf, buflen, OSD_DMU_USEROBJ_PREFIX "%llx",
133 rc = osd_zap_lookup(osd, dn->dn_object, dn, buf,
134 sizeof(uint64_t), 1, &rec->ispace);
137 "%s: id %s not found dnode accounting\n",
138 osd->od_svname, buf);
139 } else if (rc == 0) {
148 * Initialize osd Iterator for given osd index object.
150 * \param dt - osd index object
151 * \param attr - not used
153 static struct dt_it *osd_it_acct_init(const struct lu_env *env,
154 struct dt_object *dt,
157 struct osd_thread_info *info = osd_oti_get(env);
158 struct osd_it_quota *it;
159 struct lu_object *lo = &dt->do_lu;
160 struct osd_device *osd = osd_dev(lo->lo_dev);
165 LASSERT(lu_object_exists(lo));
168 RETURN(ERR_PTR(-ENOMEM));
172 RETURN(ERR_PTR(-ENOMEM));
174 memset(it, 0, sizeof(*it));
175 dn = osd_quota_fid2dmu(osd, lu_object_fid(lo));
176 it->oiq_oid = dn->dn_object;
178 /* initialize zap cursor */
179 rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0);
185 /* take object reference */
187 it->oiq_obj = osd_dt_obj(dt);
190 RETURN((struct dt_it *)it);
194 * Free given iterator.
196 * \param di - osd iterator
198 static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
200 struct osd_it_quota *it = (struct osd_it_quota *)di;
203 osd_zap_cursor_fini(it->oiq_zc);
204 osd_object_put(env, it->oiq_obj);
211 * Move on to the next valid entry.
213 * \param di - osd iterator
215 * \retval +ve - iterator reached the end
216 * \retval 0 - iterator has not reached the end yet
217 * \retval -ve - unexpected failure
219 static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
221 struct osd_it_quota *it = (struct osd_it_quota *)di;
222 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
226 if (it->oiq_reset == 0)
227 zap_cursor_advance(it->oiq_zc);
229 rc = -zap_cursor_retrieve(it->oiq_zc, za);
230 if (rc == -ENOENT) /* reached the end */
236 * Return pointer to the key under iterator.
238 * \param di - osd iterator
240 static struct dt_key *osd_it_acct_key(const struct lu_env *env,
241 const struct dt_it *di)
243 struct osd_it_quota *it = (struct osd_it_quota *)di;
244 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
249 rc = -zap_cursor_retrieve(it->oiq_zc, za);
252 rc = kstrtoull(za->za_name, 16, &it->oiq_id);
254 RETURN((struct dt_key *) &it->oiq_id);
258 * Return size of key under iterator (in bytes)
260 * \param di - osd iterator
262 static int osd_it_acct_key_size(const struct lu_env *env,
263 const struct dt_it *di)
266 RETURN((int)sizeof(uint64_t));
270 * zap_cursor_retrieve read from current record.
271 * to read bytes we need to call zap_lookup explicitly.
273 static int osd_zap_cursor_retrieve_value(const struct lu_env *env,
274 struct osd_it_quota *it,
275 char *buf, int buf_size,
278 const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
279 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
280 zap_cursor_t *zc = it->oiq_zc;
281 struct osd_device *osd = osd_obj2dev(it->oiq_obj);
284 rc = -zap_cursor_retrieve(zc, za);
285 if (unlikely(rc != 0))
288 if (unlikely(za->za_integer_length <= 0))
291 actual_size = za->za_integer_length * za->za_num_integers;
293 if (actual_size > buf_size) {
294 actual_size = buf_size;
295 buf_size = actual_size / za->za_integer_length;
297 buf_size = za->za_num_integers;
300 /* use correct special ID to request bytes used */
301 rc = osd_zap_lookup(osd, fid_oid(fid) == ACCT_GROUP_OID ?
302 DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT, NULL,
303 za->za_name, za->za_integer_length, buf_size, buf);
305 *bytes_read = actual_size;
311 * Return pointer to the record under iterator.
313 * \param di - osd iterator
314 * \param attr - not used
316 static int osd_it_acct_rec(const struct lu_env *env,
317 const struct dt_it *di,
318 struct dt_rec *dtrec, __u32 attr)
320 struct osd_thread_info *info = osd_oti_get(env);
321 zap_attribute_t *za = &info->oti_za;
322 struct osd_it_quota *it = (struct osd_it_quota *)di;
323 struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
324 struct osd_object *obj = it->oiq_obj;
325 struct osd_device *osd = osd_obj2dev(obj);
331 rec->ispace = rec->bspace = 0;
333 /* retrieve block usage from the DMU accounting object */
334 rc = osd_zap_cursor_retrieve_value(env, it, (char *)&rec->bspace,
335 sizeof(uint64_t), &bytes_read);
339 if (!osd_dmu_userobj_accounting_available(osd)) {
340 if (rec->bspace != 0)
341 /* estimate #inodes in use */
342 rec->ispace = osd_objset_user_iused(osd, rec->bspace);
346 /* retrieve key associated with the current cursor */
347 rc = -zap_cursor_retrieve(it->oiq_zc, za);
348 if (unlikely(rc != 0))
351 /* inode accounting is not maintained by DMU, so we use our own ZAP to
352 * track inode usage */
353 rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object,
354 it->oiq_obj->oo_dn, za->za_name, sizeof(uint64_t),
357 /* user/group has not created any file yet */
358 CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
359 osd->od_svname, za->za_name);
367 * Returns cookie for current Iterator position.
369 * \param di - osd iterator
371 static __u64 osd_it_acct_store(const struct lu_env *env,
372 const struct dt_it *di)
374 struct osd_it_quota *it = (struct osd_it_quota *)di;
377 RETURN(osd_zap_cursor_serialize(it->oiq_zc));
381 * Restore iterator from cookie. if the \a hash isn't found,
382 * restore the first valid record.
384 * \param di - osd iterator
385 * \param hash - iterator location cookie
387 * \retval +ve - di points to exact matched key
388 * \retval 0 - di points to the first valid record
389 * \retval -ve - failure
391 static int osd_it_acct_load(const struct lu_env *env,
392 const struct dt_it *di, __u64 hash)
394 struct osd_it_quota *it = (struct osd_it_quota *)di;
395 struct osd_device *osd = osd_obj2dev(it->oiq_obj);
396 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
401 /* create new cursor pointing to the new hash */
402 rc = osd_zap_cursor_init(&zc, osd->od_os, it->oiq_oid, hash);
405 osd_zap_cursor_fini(it->oiq_zc);
409 rc = -zap_cursor_retrieve(it->oiq_zc, za);
412 else if (rc == -ENOENT)
419 * Move Iterator to record specified by \a key, if the \a key isn't found,
420 * move to the first valid record.
422 * \param di - osd iterator
423 * \param key - uid or gid
425 * \retval +ve - di points to exact matched key
426 * \retval 0 - di points to the first valid record
427 * \retval -ve - failure
429 static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
430 const struct dt_key *key)
434 /* XXX: like osd_zap_it_get(), API is currently broken */
435 LASSERT(*((__u64 *)key) == 0);
437 RETURN(osd_it_acct_load(env, di, 0));
443 * \param di - osd iterator
445 static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
450 * Index and Iterator operations for accounting objects
452 const struct dt_index_operations osd_acct_index_ops = {
453 .dio_lookup = osd_acct_index_lookup,
455 .init = osd_it_acct_init,
456 .fini = osd_it_acct_fini,
457 .get = osd_it_acct_get,
458 .put = osd_it_acct_put,
459 .next = osd_it_acct_next,
460 .key = osd_it_acct_key,
461 .key_size = osd_it_acct_key_size,
462 .rec = osd_it_acct_rec,
463 .store = osd_it_acct_store,
464 .load = osd_it_acct_load
469 * Quota Enforcement Management
473 * Wrapper for qsd_op_begin().
475 * \param env - the environment passed by the caller
476 * \param osd - is the osd_device
477 * \param uid - user id of the inode
478 * \param gid - group id of the inode
479 * \param space - how many blocks/inodes will be consumed/released
480 * \param oh - osd transaction handle
481 * \param is_blk - block quota or inode quota?
482 * \param flags - if the operation is write, return no user quota, no
483 * group quota, or sync commit flags to the caller
484 * \param force - set to 1 when changes are performed by root user and thus
485 * can't failed with EDQUOT
487 * \retval 0 - success
488 * \retval -ve - failure
490 int osd_declare_quota(const struct lu_env *env, struct osd_device *osd,
491 qid_t uid, qid_t gid, long long space,
492 struct osd_thandle *oh, bool is_blk, int *flags,
495 struct osd_thread_info *info = osd_oti_get(env);
496 struct lquota_id_info *qi = &info->oti_qi;
497 struct qsd_instance *qsd = osd->od_quota_slave;
498 int rcu, rcg; /* user & group rc */
501 if (unlikely(qsd == NULL))
502 /* quota slave instance hasn't been allocated yet */
505 /* let's start with user quota */
506 qi->lqi_id.qid_uid = uid;
507 qi->lqi_type = USRQUOTA;
508 qi->lqi_space = space;
509 qi->lqi_is_blk = is_blk;
510 rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
512 if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS))
513 /* ignore EDQUOT & EINPROGRESS when changes are done by root */
516 /* For non-fatal error, we want to continue to get the noquota flags
517 * for group id. This is only for commit write, which has @flags passed
518 * in. See osd_declare_write_commit().
519 * When force is set to true, we also want to proceed with the gid */
520 if (rcu && (rcu != -EDQUOT || flags == NULL))
523 /* and now group quota */
524 qi->lqi_id.qid_gid = gid;
525 qi->lqi_type = GRPQUOTA;
526 rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
528 if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS))
529 /* as before, ignore EDQUOT & EINPROGRESS for root */
532 RETURN(rcu ? rcu : rcg);