4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann@whamcloud.com>
30 #include <lustre_quota.h>
32 #include "osd_internal.h"
34 #include <sys/dnode.h>
37 #include <sys/dmu_tx.h>
38 #include <sys/dsl_prop.h>
42 * the structure tracks per-ID change/state
44 struct zfs_id_change {
45 struct hlist_node zic_hash;
51 * callback data for cfs_hash_for_each_safe()
52 * used in txg commit and OSD cleanup path
55 struct osd_device *hcb_osd;
61 * Helper function to retrieve DMU object id from fid for accounting object
63 static inline uint64_t osd_quota_fid2dmu(const struct lu_fid *fid)
65 LASSERT(fid_is_acct(fid));
66 if (fid_oid(fid) == ACCT_GROUP_OID)
67 return DMU_GROUPUSED_OBJECT;
68 return DMU_USERUSED_OBJECT;
72 * a note about locking:
73 * entries in per-OSD cache never go before umount,
74 * so there is no need in locking for lookups.
76 * entries in per-txg deltas never go before txg is closed,
77 * there is no concurrency between removal/insertions.
79 * also, given all above, there is no need in reference counting.
81 static struct zfs_id_change *osd_zfs_lookup_by_id(cfs_hash_t *hash, __u64 id)
83 struct zfs_id_change *za = NULL;
84 struct hlist_node *hnode;
87 cfs_hash_bd_get(hash, &id, &bd);
88 hnode = cfs_hash_bd_peek_locked(hash, &bd, &id);
90 za = container_of0(hnode, struct zfs_id_change, zic_hash);
95 static struct zfs_id_change *lookup_or_create_by_id(struct osd_device *osd,
96 cfs_hash_t *hash, __u64 id)
98 struct zfs_id_change *za, *tmp;
99 struct hlist_node *hnode;
102 za = osd_zfs_lookup_by_id(hash, id);
103 if (likely(za != NULL))
107 if (unlikely(za == NULL))
112 cfs_hash_bd_get(hash, &id, &bd);
113 spin_lock(&osd->od_known_txg_lock);
114 hnode = cfs_hash_bd_findadd_locked(hash, &bd, &id, &za->zic_hash, 1);
115 LASSERT(hnode != NULL);
116 tmp = container_of0(hnode, struct zfs_id_change, zic_hash);
117 spin_unlock(&osd->od_known_txg_lock);
121 * our structure got into the hash
124 /* somebody won the race, we wasted the cycles */
132 * used to maintain per-txg deltas
134 static int osd_zfs_acct_id(const struct lu_env *env, cfs_hash_t *hash,
135 __u64 id, int delta, struct osd_thandle *oh)
137 struct osd_device *osd = osd_dt_dev(oh->ot_super.th_dev);
138 struct zfs_id_change *za;
142 LASSERT(oh->ot_tx->tx_txg == osd->od_known_txg);
143 LASSERT(osd->od_acct_delta != NULL);
145 za = lookup_or_create_by_id(osd, hash, id);
146 if (unlikely(za == NULL))
149 atomic_add(delta, &za->zic_num);
155 * this function is used to maintain current state for given ID:
156 * at the beginning it initializes the cache from correspoding ZAP
158 static void osd_zfs_acct_cache_init(const struct lu_env *env,
159 struct osd_device *osd,
160 cfs_hash_t *hash, __u64 oid,
162 struct osd_thandle *oh)
164 char *buf = osd_oti_get(env)->oti_buf;
165 struct hlist_node *hnode;
167 struct zfs_id_change *za, *tmp;
171 za = osd_zfs_lookup_by_id(hash, id);
172 if (likely(za != NULL))
176 * any concurrent thread is running in the same txg, so no on-disk
177 * accounting ZAP can be modified until this txg is closed
178 * thus all the concurrent threads must be getting the same value
179 * from that ZAP and we don't need to serialize lookups
181 snprintf(buf, sizeof(osd_oti_get(env)->oti_buf), "%llx", id);
182 /* XXX: we should be using zap_lookup_int_key(), but it consumes
183 * 20 bytes on the stack for buf .. */
184 rc = -zap_lookup(osd->od_objset.os, oid, buf, sizeof(uint64_t), 1, &v);
187 } else if (unlikely(rc != 0)) {
188 CERROR("%s: can't access accounting zap %llu\n",
189 osd->od_svname, oid);
194 if (unlikely(za == NULL)) {
195 CERROR("%s: can't allocate za\n", osd->od_svname);
200 atomic_set(&za->zic_num, v);
202 cfs_hash_bd_get(hash, &id, &bd);
203 spin_lock(&osd->od_known_txg_lock);
204 hnode = cfs_hash_bd_findadd_locked(hash, &bd, &id, &za->zic_hash, 1);
205 LASSERT(hnode != NULL);
206 tmp = container_of0(hnode, struct zfs_id_change, zic_hash);
207 spin_unlock(&osd->od_known_txg_lock);
210 /* our structure got into the hash */
212 /* there was no entry in ZAP yet, we have
213 * to initialize with 0, so that accounting
214 * reports can find that and then find our
217 rc = -zap_update(osd->od_objset.os, oid, buf,
218 sizeof(uint64_t), 1, &v, oh->ot_tx);
219 if (unlikely(rc != 0))
220 CERROR("%s: can't initialize: rc = %d\n",
224 /* somebody won the race, we wasted the cycles */
231 atomic_add(delta, &za->zic_num);
234 static __u32 acct_hashfn(cfs_hash_t *hash_body, const void *key, unsigned mask)
236 const __u64 *id = key;
239 result = (__u32) *id;
240 return result % mask;
243 static void *acct_key(struct hlist_node *hnode)
245 struct zfs_id_change *ac;
247 ac = hlist_entry(hnode, struct zfs_id_change, zic_hash);
251 static int acct_hashkey_keycmp(const void *key,
252 struct hlist_node *compared_hnode)
254 struct zfs_id_change *ac;
255 const __u64 *id = key;
257 ac = hlist_entry(compared_hnode, struct zfs_id_change, zic_hash);
258 return *id == ac->zic_id;
261 static void *acct_hashobject(struct hlist_node *hnode)
263 return hlist_entry(hnode, struct zfs_id_change, zic_hash);
266 static cfs_hash_ops_t acct_hash_operations = {
267 .hs_hash = acct_hashfn,
269 .hs_keycmp = acct_hashkey_keycmp,
270 .hs_object = acct_hashobject,
273 #define ACCT_HASH_OPS (CFS_HASH_NO_LOCK|CFS_HASH_NO_ITEMREF|CFS_HASH_ADD_TAIL)
275 int osd_zfs_acct_init(const struct lu_env *env, struct osd_device *o)
280 spin_lock_init(&o->od_known_txg_lock);
282 /* global structure representing current state for given ID */
283 o->od_acct_usr = cfs_hash_create("usr", 4, 4, 4, 0, 0, 0,
284 &acct_hash_operations,
286 if (o->od_acct_usr == NULL)
287 GOTO(out, rc = -ENOMEM);
289 o->od_acct_grp = cfs_hash_create("grp", 4, 4, 4, 0, 0, 0,
290 &acct_hash_operations,
292 if (o->od_acct_grp == NULL)
293 GOTO(out, rc = -ENOMEM);
299 static int osd_zfs_delete_item(cfs_hash_t *hs, cfs_hash_bd_t *bd,
300 struct hlist_node *node, void *data)
302 struct hash_cbdata *d = data;
303 struct zfs_id_change *za;
308 za = hlist_entry(node, struct zfs_id_change, zic_hash);
311 * XXX: should we try to fix accounting we failed to update before?
313 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 5, 70, 0)
315 * extra checks to ensure our cache matches on-disk state
317 snprintf(buf, sizeof(buf), "%llx", za->zic_id);
318 rc = -zap_lookup(d->hcb_osd->od_objset.os, d->hcb_zapid,
319 buf, sizeof(uint64_t), 1, &v);
320 /* pairs with zero value are removed by ZAP automatically */
323 if (atomic_read(&za->zic_num) != v) {
324 CERROR("%s: INVALID ACCOUNTING FOR %llu %d != %lld: rc = %d\n",
325 d->hcb_osd->od_svname, za->zic_id,
326 atomic_read(&za->zic_num), v, rc);
327 /* XXX: to catch with automated testing */
331 #warning "remove this additional check before release"
334 cfs_hash_bd_del_locked(hs, bd, node);
340 void osd_zfs_acct_fini(const struct lu_env *env, struct osd_device *o)
342 struct hash_cbdata cbdata;
346 /* release object accounting cache (owners) */
347 cbdata.hcb_zapid = o->od_iusr_oid;
349 if (o->od_acct_usr) {
350 cfs_hash_for_each_safe(o->od_acct_usr, osd_zfs_delete_item,
352 cfs_hash_putref(o->od_acct_usr);
353 o->od_acct_usr = NULL;
356 /* release object accounting cache (groups) */
357 cbdata.hcb_zapid = o->od_igrp_oid;
359 if (o->od_acct_grp) {
360 cfs_hash_for_each_safe(o->od_acct_grp, osd_zfs_delete_item,
362 cfs_hash_putref(o->od_acct_grp);
363 o->od_acct_grp = NULL;
367 static int osd_zfs_commit_item(cfs_hash_t *hs, cfs_hash_bd_t *bd,
368 struct hlist_node *node, void *data)
370 struct hash_cbdata *d = data;
371 struct osd_device *osd = d->hcb_osd;
372 struct zfs_id_change *za;
375 za = hlist_entry(node, struct zfs_id_change, zic_hash);
377 rc = -zap_increment_int(osd->od_objset.os, d->hcb_zapid, za->zic_id,
378 atomic_read(&za->zic_num), d->hcb_tx);
379 if (unlikely(rc != 0))
380 CERROR("%s: quota update for UID "LPU64" failed: rc = %d\n",
381 osd->od_svname, za->zic_id, rc);
383 cfs_hash_bd_del_locked(hs, bd, node);
390 * this function is called as part of txg commit procedure,
391 * no more normal changes are allowed to this txg.
392 * we go over all the changes cached in per-txg structure
393 * and apply them to actual ZAPs
395 #ifdef HAVE_DSL_SYNC_TASK_DO_NOWAIT
396 static void osd_zfs_acct_update(void *arg, void *arg2, dmu_tx_t *tx)
398 static void osd_zfs_acct_update(void *arg, dmu_tx_t *tx)
401 struct osd_zfs_acct_txg *zat = arg;
402 struct osd_device *osd = zat->zat_osd;
403 struct hash_cbdata cbdata;
405 cbdata.hcb_osd = osd;
408 CDEBUG(D_OTHER, "COMMIT %llu on %s\n", tx->tx_txg, osd->od_svname);
410 /* apply changes related to the owners */
411 cbdata.hcb_zapid = osd->od_iusr_oid;
412 cfs_hash_for_each_safe(zat->zat_usr, osd_zfs_commit_item, &cbdata);
414 /* apply changes related to the groups */
415 cbdata.hcb_zapid = osd->od_igrp_oid;
416 cfs_hash_for_each_safe(zat->zat_grp, osd_zfs_commit_item, &cbdata);
418 cfs_hash_putref(zat->zat_usr);
419 cfs_hash_putref(zat->zat_grp);
424 #ifdef HAVE_DSL_SYNC_TASK_DO_NOWAIT
425 #define dsl_sync_task_nowait(pool, func, arg, blocks, tx) \
426 dsl_sync_task_do_nowait(pool, NULL, func, arg, NULL, blocks, tx)
430 * if any change to the object accounting is going to happen,
431 * we create one structure per txg to track all the changes
432 * and register special routine to be called as part of txg
435 int osd_zfs_acct_trans_start(const struct lu_env *env, struct osd_thandle *oh)
437 struct osd_device *osd = osd_dt_dev(oh->ot_super.th_dev);
438 struct osd_zfs_acct_txg *ac = NULL;
439 int rc = 0, add_work = 0;
441 if (likely(oh->ot_tx->tx_txg == osd->od_known_txg)) {
442 /* already created */
447 if (unlikely(ac == NULL))
450 ac->zat_usr = cfs_hash_create("usr", 4, 4, 4, 0, 0, 0,
451 &acct_hash_operations,
453 if (unlikely(ac->zat_usr == NULL)) {
454 CERROR("%s: can't allocate hash for accounting\n",
456 GOTO(out, rc = -ENOMEM);
459 ac->zat_grp = cfs_hash_create("grp", 4, 4, 4, 0, 0, 0,
460 &acct_hash_operations,
462 if (unlikely(ac->zat_grp == NULL)) {
463 CERROR("%s: can't allocate hash for accounting\n",
465 GOTO(out, rc = -ENOMEM);
468 spin_lock(&osd->od_known_txg_lock);
469 if (oh->ot_tx->tx_txg != osd->od_known_txg) {
470 osd->od_acct_delta = ac;
471 osd->od_known_txg = oh->ot_tx->tx_txg;
474 spin_unlock(&osd->od_known_txg_lock);
476 /* schedule a callback to be run in the context of txg
477 * once the latter is closed and syncing */
479 spa_t *spa = dmu_objset_spa(osd->od_objset.os);
480 LASSERT(ac->zat_osd == NULL);
482 dsl_sync_task_nowait(spa_get_dsl(spa),
486 /* no to be freed now */
492 /* another thread has installed new structure already */
494 cfs_hash_putref(ac->zat_usr);
496 cfs_hash_putref(ac->zat_grp);
503 void osd_zfs_acct_uid(const struct lu_env *env, struct osd_device *osd,
504 __u64 uid, int delta, struct osd_thandle *oh)
508 /* add per-txg job to update accounting */
509 rc = osd_zfs_acct_trans_start(env, oh);
510 if (unlikely(rc != 0))
513 /* maintain per-OSD cached value */
514 osd_zfs_acct_cache_init(env, osd, osd->od_acct_usr,
515 osd->od_iusr_oid, uid, delta, oh);
517 /* maintain per-TXG delta */
518 osd_zfs_acct_id(env, osd->od_acct_delta->zat_usr, uid, delta, oh);
522 void osd_zfs_acct_gid(const struct lu_env *env, struct osd_device *osd,
523 __u64 gid, int delta, struct osd_thandle *oh)
527 /* add per-txg job to update accounting */
528 rc = osd_zfs_acct_trans_start(env, oh);
529 if (unlikely(rc != 0))
532 /* maintain per-OSD cached value */
533 osd_zfs_acct_cache_init(env, osd, osd->od_acct_grp,
534 osd->od_igrp_oid, gid, delta, oh);
536 /* maintain per-TXG delta */
537 osd_zfs_acct_id(env, osd->od_acct_delta->zat_grp, gid, delta, oh);
541 * Space Accounting Management
545 * Return space usage consumed by a given uid or gid.
546 * Block usage is accurrate since it is maintained by DMU itself.
547 * However, DMU does not provide inode accounting, so the #inodes in use
548 * is estimated from the block usage and statfs information.
550 * \param env - is the environment passed by the caller
551 * \param dtobj - is the accounting object
552 * \param dtrec - is the record to fill with space usage information
553 * \param dtkey - is the id the of the user or group for which we would
554 * like to access disk usage.
555 * \param capa - is the capability, not used.
557 * \retval +ve - success : exact match
558 * \retval -ve - failure
560 static int osd_acct_index_lookup(const struct lu_env *env,
561 struct dt_object *dtobj,
562 struct dt_rec *dtrec,
563 const struct dt_key *dtkey,
564 struct lustre_capa *capa)
566 struct osd_thread_info *info = osd_oti_get(env);
567 char *buf = info->oti_buf;
568 struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
569 struct osd_object *obj = osd_dt_obj(dtobj);
570 struct osd_device *osd = osd_obj2dev(obj);
572 struct zfs_id_change *za = NULL;
576 rec->bspace = rec->ispace = 0;
578 /* convert the 64-bit uid/gid into a string */
579 sprintf(buf, "%llx", *((__u64 *)dtkey));
580 /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be
582 oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu));
584 /* disk usage (in bytes) is maintained by DMU.
585 * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which
586 * not associated with any dmu_but_t (see dnode_special_open()).
587 * As a consequence, we cannot use udmu_zap_lookup() here since it
588 * requires a valid oo_db. */
589 rc = -zap_lookup(osd->od_objset.os, oid, buf, sizeof(uint64_t), 1,
592 /* user/group has not created anything yet */
593 CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n",
594 osd->od_svname, buf);
598 if (osd->od_quota_iused_est) {
599 if (rec->bspace != 0)
600 /* estimate #inodes in use */
601 rec->ispace = udmu_objset_user_iused(&osd->od_objset,
606 /* as for inode accounting, it is not maintained by DMU, so we just
607 * use our own ZAP to track inode usage */
608 if (oid == DMU_USERUSED_OBJECT) {
609 za = osd_zfs_lookup_by_id(osd->od_acct_usr,
611 } else if (oid == DMU_GROUPUSED_OBJECT) {
612 za = osd_zfs_lookup_by_id(osd->od_acct_grp,
616 rec->ispace = atomic_read(&za->zic_num);
618 rc = -zap_lookup(osd->od_objset.os, obj->oo_db->db_object,
619 buf, sizeof(uint64_t), 1, &rec->ispace);
623 /* user/group has not created any file yet */
624 CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
625 osd->od_svname, buf);
633 * Initialize osd Iterator for given osd index object.
635 * \param dt - osd index object
636 * \param attr - not used
637 * \param capa - BYPASS_CAPA
639 static struct dt_it *osd_it_acct_init(const struct lu_env *env,
640 struct dt_object *dt,
642 struct lustre_capa *capa)
644 struct osd_thread_info *info = osd_oti_get(env);
645 struct osd_it_quota *it;
646 struct lu_object *lo = &dt->do_lu;
647 struct osd_device *osd = osd_dev(lo->lo_dev);
651 LASSERT(lu_object_exists(lo));
654 RETURN(ERR_PTR(-ENOMEM));
656 it = &info->oti_it_quota;
657 memset(it, 0, sizeof(*it));
658 it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo));
660 if (it->oiq_oid == DMU_GROUPUSED_OBJECT)
661 it->oiq_hash = osd->od_acct_grp;
662 else if (it->oiq_oid == DMU_USERUSED_OBJECT)
663 it->oiq_hash = osd->od_acct_usr;
667 /* initialize zap cursor */
668 rc = -udmu_zap_cursor_init(&it->oiq_zc, &osd->od_objset, it->oiq_oid,0);
672 /* take object reference */
674 it->oiq_obj = osd_dt_obj(dt);
677 RETURN((struct dt_it *)it);
681 * Free given iterator.
683 * \param di - osd iterator
685 static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
687 struct osd_it_quota *it = (struct osd_it_quota *)di;
689 udmu_zap_cursor_fini(it->oiq_zc);
690 lu_object_put(env, &it->oiq_obj->oo_dt.do_lu);
695 * Move on to the next valid entry.
697 * \param di - osd iterator
699 * \retval +ve - iterator reached the end
700 * \retval 0 - iterator has not reached the end yet
701 * \retval -ve - unexpected failure
703 static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
705 struct osd_it_quota *it = (struct osd_it_quota *)di;
709 if (it->oiq_reset == 0)
710 zap_cursor_advance(it->oiq_zc);
712 rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32);
713 if (rc == -ENOENT) /* reached the end */
719 * Return pointer to the key under iterator.
721 * \param di - osd iterator
723 static struct dt_key *osd_it_acct_key(const struct lu_env *env,
724 const struct dt_it *di)
726 struct osd_it_quota *it = (struct osd_it_quota *)di;
727 struct osd_thread_info *info = osd_oti_get(env);
728 char *buf = info->oti_buf;
734 rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32);
737 it->oiq_id = simple_strtoull(buf, &p, 16);
738 RETURN((struct dt_key *) &it->oiq_id);
742 * Return size of key under iterator (in bytes)
744 * \param di - osd iterator
746 static int osd_it_acct_key_size(const struct lu_env *env,
747 const struct dt_it *di)
750 RETURN((int)sizeof(uint64_t));
754 * Return pointer to the record under iterator.
756 * \param di - osd iterator
757 * \param attr - not used
759 static int osd_it_acct_rec(const struct lu_env *env,
760 const struct dt_it *di,
761 struct dt_rec *dtrec, __u32 attr)
763 struct osd_thread_info *info = osd_oti_get(env);
764 char *buf = info->oti_buf;
765 struct osd_it_quota *it = (struct osd_it_quota *)di;
766 struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
767 struct osd_object *obj = it->oiq_obj;
768 struct osd_device *osd = osd_obj2dev(obj);
770 struct zfs_id_change *za;
775 rec->ispace = rec->bspace = 0;
777 /* retrieve block usage from the DMU accounting object */
778 rc = -udmu_zap_cursor_retrieve_value(env, it->oiq_zc,
779 (char *)&rec->bspace,
780 sizeof(uint64_t), &bytes_read);
784 if (osd->od_quota_iused_est) {
785 if (rec->bspace != 0)
786 /* estimate #inodes in use */
787 rec->ispace = udmu_objset_user_iused(&osd->od_objset,
792 /* retrieve key associated with the current cursor */
793 rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32);
797 /* inode accounting is not maintained by DMU, so we use our own ZAP to
798 * track inode usage */
799 za = osd_zfs_lookup_by_id(it->oiq_hash, it->oiq_id);
801 /* found in the cache */
802 rec->ispace = atomic_read(&za->zic_num);
804 rc = -zap_lookup(osd->od_objset.os,
805 it->oiq_obj->oo_db->db_object,
806 buf, sizeof(uint64_t), 1, &rec->ispace);
808 /* user/group has not created any file yet */
809 CDEBUG(D_QUOTA, "%s: id %s not found in ZAP\n",
810 osd->od_svname, buf);
819 * Returns cookie for current Iterator position.
821 * \param di - osd iterator
823 static __u64 osd_it_acct_store(const struct lu_env *env,
824 const struct dt_it *di)
826 struct osd_it_quota *it = (struct osd_it_quota *)di;
829 RETURN(udmu_zap_cursor_serialize(it->oiq_zc));
833 * Restore iterator from cookie. if the \a hash isn't found,
834 * restore the first valid record.
836 * \param di - osd iterator
837 * \param hash - iterator location cookie
839 * \retval +ve - di points to exact matched key
840 * \retval 0 - di points to the first valid record
841 * \retval -ve - failure
843 static int osd_it_acct_load(const struct lu_env *env,
844 const struct dt_it *di, __u64 hash)
846 struct osd_it_quota *it = (struct osd_it_quota *)di;
847 struct osd_device *osd = osd_obj2dev(it->oiq_obj);
852 /* create new cursor pointing to the new hash */
853 rc = -udmu_zap_cursor_init(&zc, &osd->od_objset, it->oiq_oid, hash);
856 udmu_zap_cursor_fini(it->oiq_zc);
860 rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32);
863 else if (rc == -ENOENT)
869 * Move Iterator to record specified by \a key, if the \a key isn't found,
870 * move to the first valid record.
872 * \param di - osd iterator
873 * \param key - uid or gid
875 * \retval +ve - di points to exact matched key
876 * \retval 0 - di points to the first valid record
877 * \retval -ve - failure
879 static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
880 const struct dt_key *key)
884 /* XXX: like osd_zap_it_get(), API is currently broken */
885 LASSERT(*((__u64 *)key) == 0);
887 RETURN(osd_it_acct_load(env, di, 0));
893 * \param di - osd iterator
895 static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
900 * Index and Iterator operations for accounting objects
902 const struct dt_index_operations osd_acct_index_ops = {
903 .dio_lookup = osd_acct_index_lookup,
905 .init = osd_it_acct_init,
906 .fini = osd_it_acct_fini,
907 .get = osd_it_acct_get,
908 .put = osd_it_acct_put,
909 .next = osd_it_acct_next,
910 .key = osd_it_acct_key,
911 .key_size = osd_it_acct_key_size,
912 .rec = osd_it_acct_rec,
913 .store = osd_it_acct_store,
914 .load = osd_it_acct_load
919 * Quota Enforcement Management
923 * Wrapper for qsd_op_begin().
925 * \param env - the environment passed by the caller
926 * \param osd - is the osd_device
927 * \param uid - user id of the inode
928 * \param gid - group id of the inode
929 * \param space - how many blocks/inodes will be consumed/released
930 * \param oh - osd transaction handle
931 * \param is_blk - block quota or inode quota?
932 * \param flags - if the operation is write, return no user quota, no
933 * group quota, or sync commit flags to the caller
934 * \param force - set to 1 when changes are performed by root user and thus
935 * can't failed with EDQUOT
937 * \retval 0 - success
938 * \retval -ve - failure
940 int osd_declare_quota(const struct lu_env *env, struct osd_device *osd,
941 qid_t uid, qid_t gid, long long space,
942 struct osd_thandle *oh, bool is_blk, int *flags,
945 struct osd_thread_info *info = osd_oti_get(env);
946 struct lquota_id_info *qi = &info->oti_qi;
947 struct qsd_instance *qsd = osd->od_quota_slave;
948 int rcu, rcg; /* user & group rc */
951 if (unlikely(qsd == NULL))
952 /* quota slave instance hasn't been allocated yet */
955 /* let's start with user quota */
956 qi->lqi_id.qid_uid = uid;
957 qi->lqi_type = USRQUOTA;
958 qi->lqi_space = space;
959 qi->lqi_is_blk = is_blk;
960 rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
962 if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS))
963 /* ignore EDQUOT & EINPROGRESS when changes are done by root */
966 /* For non-fatal error, we want to continue to get the noquota flags
967 * for group id. This is only for commit write, which has @flags passed
968 * in. See osd_declare_write_commit().
969 * When force is set to true, we also want to proceed with the gid */
970 if (rcu && (rcu != -EDQUOT || flags == NULL))
973 /* and now group quota */
974 qi->lqi_id.qid_gid = gid;
975 qi->lqi_type = GRPQUOTA;
976 rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
978 if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS))
979 /* as before, ignore EDQUOT & EINPROGRESS for root */
982 RETURN(rcu ? rcu : rcg);