* GPL HEADER END
*/
/*
- * Copyright (c) 2012, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
* Use is subject to license terms.
*
* Author: Johann Lombardi <johann@whamcloud.com>
#include "osd_internal.h"
/**
- * Helper function to retrieve DMU object id from fid for accounting object
- */
-uint64_t osd_quota_fid2dmu(const struct lu_fid *fid)
-{
- LASSERT(fid_is_acct(fid));
- if (fid_oid(fid) == ACCT_GROUP_OID)
- return DMU_GROUPUSED_OBJECT;
- return DMU_USERUSED_OBJECT;
-}
-
-/**
- * Helper function to estimate the number of inodes in use for a give uid/gid
- * from the block usage
+ * Helper function to estimate the number of inodes in use for the given
+ * uid/gid/projid from the block usage
*/
static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes)
{
uint64_t refdbytes, availbytes, usedobjs, availobjs;
- uint64_t uidobjs;
+ uint64_t uidobjs, bshift;
/* get fresh statfs info */
dmu_objset_space(osd->od_os, &refdbytes, &availbytes,
&usedobjs, &availobjs);
/* estimate the number of objects based on the disk usage */
+ bshift = fls64(osd->od_max_blksz) - 1;
uidobjs = osd_objs_count_estimate(refdbytes, usedobjs,
- uidbytes >> SPA_MAXBLOCKSHIFT);
+ uidbytes >> bshift, bshift);
if (uidbytes > 0)
/* if we have at least 1 byte, we have at least one dnode ... */
uidobjs = max_t(uint64_t, uidobjs, 1);
*/
/**
- * Return space usage consumed by a given uid or gid.
+ * Return space usage consumed by a given uid or gid or projid.
* Block usage is accurrate since it is maintained by DMU itself.
* However, DMU does not provide inode accounting, so the #inodes in use
* is estimated from the block usage and statfs information.
* \param dtrec - is the record to fill with space usage information
* \param dtkey - is the id the of the user or group for which we would
* like to access disk usage.
- * \param capa - is the capability, not used.
*
* \retval +ve - success : exact match
* \retval -ve - failure
static int osd_acct_index_lookup(const struct lu_env *env,
struct dt_object *dtobj,
struct dt_rec *dtrec,
- const struct dt_key *dtkey,
- struct lustre_capa *capa)
+ const struct dt_key *dtkey)
{
- struct osd_thread_info *info = osd_oti_get(env);
- char *buf = info->oti_buf;
- struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
- struct osd_object *obj = osd_dt_obj(dtobj);
- struct osd_device *osd = osd_obj2dev(obj);
- int rc;
- uint64_t oid;
+ struct osd_thread_info *info = osd_oti_get(env);
+ char *buf = info->oti_buf;
+ struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
+ struct osd_object *obj = osd_dt_obj(dtobj);
+ struct osd_device *osd = osd_obj2dev(obj);
+ dnode_t *dn = obj->oo_dn;
+ size_t buflen = sizeof(info->oti_buf);
+ int rc;
ENTRY;
rec->bspace = rec->ispace = 0;
- /* convert the 64-bit uid/gid into a string */
- sprintf(buf, "%llx", *((__u64 *)dtkey));
- /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be
- * used */
- oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu));
+ /* convert the 64-bit uid/gid/projid into a string */
+ snprintf(buf, buflen, "%llx", *((__u64 *)dtkey));
+ if (unlikely(!dn)) {
+ CDEBUG(D_QUOTA, "%s: miss accounting obj for %s\n",
+ osd->od_svname, buf);
+
+ RETURN(-ENOENT);
+ }
/* disk usage (in bytes) is maintained by DMU.
* DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which
- * not associated with any dmu_but_t (see dnode_special_open()).
- * As a consequence, we cannot use udmu_zap_lookup() here since it
- * requires a valid oo_db. */
- rc = -zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1,
- &rec->bspace);
- if (rc == -ENOENT)
- /* user/group has not created anything yet */
+ * not associated with any dmu_but_t (see dnode_special_open()). */
+ rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, sizeof(uint64_t), 1,
+ &rec->bspace);
+ if (rc == -ENOENT) {
+ /* user/group/project has not created anything yet */
CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n",
osd->od_svname, buf);
- else if (rc)
+ /* -ENOENT is normal case, convert it as 1. */
+ rc = 1;
+ } else if (rc) {
RETURN(rc);
+ }
- if (osd->od_quota_iused_est) {
+ if (!osd_dmu_userobj_accounting_available(osd)) {
if (rec->bspace != 0)
/* estimate #inodes in use */
rec->ispace = osd_objset_user_iused(osd, rec->bspace);
- RETURN(+1);
+ rc = 1;
+ } else {
+ snprintf(buf, buflen, OSD_DMU_USEROBJ_PREFIX "%llx",
+ *((__u64 *)dtkey));
+ rc = osd_zap_lookup(osd, dn->dn_object, dn, buf,
+ sizeof(uint64_t), 1, &rec->ispace);
+ if (rc == -ENOENT) {
+ CDEBUG(D_QUOTA,
+ "%s: id %s not found dnode accounting\n",
+ osd->od_svname, buf);
+ /* -ENOENT is normal case, convert it as 1. */
+ rc = 1;
+ } else if (rc == 0) {
+ rc = 1;
+ }
}
- /* as for inode accounting, it is not maintained by DMU, so we just
- * use our own ZAP to track inode usage */
- rc = -zap_lookup(osd->od_os, obj->oo_db->db_object,
- buf, sizeof(uint64_t), 1, &rec->ispace);
- if (rc == -ENOENT)
- /* user/group has not created any file yet */
- CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
- osd->od_svname, buf);
- else if (rc)
- RETURN(rc);
-
- RETURN(+1);
+ RETURN(rc);
}
/**
*
* \param dt - osd index object
* \param attr - not used
- * \param capa - BYPASS_CAPA
*/
static struct dt_it *osd_it_acct_init(const struct lu_env *env,
struct dt_object *dt,
- __u32 attr,
- struct lustre_capa *capa)
+ __u32 attr)
{
- struct osd_thread_info *info = osd_oti_get(env);
- struct osd_it_quota *it;
- struct lu_object *lo = &dt->do_lu;
- struct osd_device *osd = osd_dev(lo->lo_dev);
- int rc;
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct osd_it_quota *it;
+ struct osd_object *obj = osd_dt_obj(dt);
+ struct osd_device *osd = osd_obj2dev(obj);
+ dnode_t *dn = obj->oo_dn;
+ int rc;
ENTRY;
- LASSERT(lu_object_exists(lo));
+ if (unlikely(!dn)) {
+ CDEBUG(D_QUOTA, "%s: Not found in DMU accounting ZAP\n",
+ osd->od_svname);
+
+ RETURN(ERR_PTR(-ENOENT));
+ }
if (info == NULL)
RETURN(ERR_PTR(-ENOMEM));
- if (info->oti_it_inline) {
- OBD_ALLOC_PTR(it);
- if (it == NULL)
- RETURN(ERR_PTR(-ENOMEM));
- } else {
- it = &info->oti_it_quota;
- info->oti_it_inline = 1;
- }
+ OBD_ALLOC_PTR(it);
+ if (it == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
memset(it, 0, sizeof(*it));
- it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo));
+ it->oiq_oid = dn->dn_object;
/* initialize zap cursor */
rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0);
if (rc != 0) {
- if (it != &info->oti_it_quota)
- OBD_FREE_PTR(it);
- else
- info->oti_it_inline = 0;
-
+ OBD_FREE_PTR(it);
RETURN(ERR_PTR(rc));
}
/* take object reference */
- lu_object_get(lo);
+ lu_object_get(&dt->do_lu);
it->oiq_obj = osd_dt_obj(dt);
it->oiq_reset = 1;
*/
static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
{
- struct osd_thread_info *info = osd_oti_get(env);
struct osd_it_quota *it = (struct osd_it_quota *)di;
ENTRY;
osd_zap_cursor_fini(it->oiq_zc);
- lu_object_put(env, &it->oiq_obj->oo_dt.do_lu);
- if (it != &info->oti_it_quota)
- OBD_FREE_PTR(it);
- else
- info->oti_it_inline = 0;
+ osd_object_put(env, it->oiq_obj);
+ OBD_FREE_PTR(it);
EXIT;
}
/**
+ * Locate the first entry that is for space accounting.
+ */
+static int osd_zap_locate(struct osd_it_quota *it, zap_attribute_t *za)
+{
+ int rc;
+ ENTRY;
+
+ while (1) {
+ rc = -zap_cursor_retrieve(it->oiq_zc, za);
+ if (rc)
+ break;
+
+ if (strncmp(za->za_name, OSD_DMU_USEROBJ_PREFIX,
+ OSD_DMU_USEROBJ_PREFIX_LEN))
+ break;
+
+ zap_cursor_advance(it->oiq_zc);
+ }
+
+ RETURN(rc);
+}
+
+/**
* Move on to the next valid entry.
*
* \param di - osd iterator
if (it->oiq_reset == 0)
zap_cursor_advance(it->oiq_zc);
it->oiq_reset = 0;
- rc = -zap_cursor_retrieve(it->oiq_zc, za);
- if (rc == -ENOENT) /* reached the end */
- rc = 1;
- RETURN(rc);
+
+ rc = osd_zap_locate(it, za);
+ RETURN(rc == -ENOENT ? 1 : rc);
}
/**
ENTRY;
it->oiq_reset = 0;
- rc = -zap_cursor_retrieve(it->oiq_zc, za);
+ rc = osd_zap_locate(it, za);
if (rc)
RETURN(ERR_PTR(rc));
+
rc = kstrtoull(za->za_name, 16, &it->oiq_id);
+ if (rc)
+ CERROR("couldn't parse name %s\n", za->za_name);
RETURN((struct dt_key *) &it->oiq_id);
}
* to read bytes we need to call zap_lookup explicitly.
*/
static int osd_zap_cursor_retrieve_value(const struct lu_env *env,
- zap_cursor_t *zc, char *buf,
- int buf_size, int *bytes_read)
+ struct osd_it_quota *it,
+ char *buf, int buf_size,
+ int *bytes_read)
{
+ const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
zap_attribute_t *za = &osd_oti_get(env)->oti_za;
+ zap_cursor_t *zc = it->oiq_zc;
+ struct osd_device *osd = osd_obj2dev(it->oiq_obj);
int rc, actual_size;
rc = -zap_cursor_retrieve(zc, za);
if (unlikely(rc != 0))
- return -rc;
+ return rc;
if (unlikely(za->za_integer_length <= 0))
return -ERANGE;
buf_size = za->za_num_integers;
}
- rc = -zap_lookup(zc->zc_objset, zc->zc_zapobj,
- za->za_name, za->za_integer_length,
- buf_size, buf);
-
+ /* use correct special ID to request bytes used */
+ rc = osd_zap_lookup(osd, fid_oid(fid) == ACCT_GROUP_OID ?
+ DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT, NULL,
+ za->za_name, za->za_integer_length, buf_size, buf);
if (likely(rc == 0))
*bytes_read = actual_size;
rec->ispace = rec->bspace = 0;
/* retrieve block usage from the DMU accounting object */
- rc = osd_zap_cursor_retrieve_value(env, it->oiq_zc,
- (char *)&rec->bspace,
+ rc = osd_zap_cursor_retrieve_value(env, it, (char *)&rec->bspace,
sizeof(uint64_t), &bytes_read);
if (rc)
RETURN(rc);
- if (osd->od_quota_iused_est) {
+ if (!osd_dmu_userobj_accounting_available(osd)) {
if (rec->bspace != 0)
/* estimate #inodes in use */
rec->ispace = osd_objset_user_iused(osd, rec->bspace);
if (unlikely(rc != 0))
RETURN(rc);
- /* inode accounting is not maintained by DMU, so we use our own ZAP to
- * track inode usage */
- rc = -zap_lookup(osd->od_os, it->oiq_obj->oo_db->db_object,
- za->za_name, sizeof(uint64_t), 1, &rec->ispace);
+ /* inode accounting is maintained by DMU since 0.7.0 */
+ strncpy(info->oti_buf, OSD_DMU_USEROBJ_PREFIX,
+ OSD_DMU_USEROBJ_PREFIX_LEN);
+ strlcpy(info->oti_buf + OSD_DMU_USEROBJ_PREFIX_LEN, za->za_name,
+ sizeof(info->oti_buf) - OSD_DMU_USEROBJ_PREFIX_LEN);
+ rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object,
+ it->oiq_obj->oo_dn, info->oti_buf, sizeof(uint64_t),
+ 1, &rec->ispace);
if (rc == -ENOENT)
/* user/group has not created any file yet */
CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
- osd->od_svname, za->za_name);
+ osd->od_svname, info->oti_buf);
else if (rc)
RETURN(rc);
it->oiq_zc = zc;
it->oiq_reset = 0;
- rc = -zap_cursor_retrieve(it->oiq_zc, za);
+ rc = osd_zap_locate(it, za);
if (rc == 0)
rc = 1;
else if (rc == -ENOENT)
rc = 0;
-
RETURN(rc);
}
* move to the first valid record.
*
* \param di - osd iterator
- * \param key - uid or gid
+ * \param key - uid or gid or projid
*
* \retval +ve - di points to exact matched key
* \retval 0 - di points to the first valid record
* \param osd - is the osd_device
* \param uid - user id of the inode
* \param gid - group id of the inode
+ * \param projid - project id of the inode
* \param space - how many blocks/inodes will be consumed/released
* \param oh - osd transaction handle
- * \param is_blk - block quota or inode quota?
* \param flags - if the operation is write, return no user quota, no
* group quota, or sync commit flags to the caller
- * \param force - set to 1 when changes are performed by root user and thus
- * can't failed with EDQUOT
+ * \param osd_qid_declare_flags - indicate this is a inode/block accounting
+ * and whether changes are performed by root user
*
* \retval 0 - success
* \retval -ve - failure
*/
int osd_declare_quota(const struct lu_env *env, struct osd_device *osd,
- qid_t uid, qid_t gid, long long space,
- struct osd_thandle *oh, bool is_blk, int *flags,
- bool force)
+ qid_t uid, qid_t gid, qid_t projid, long long space,
+ struct osd_thandle *oh, int *flags,
+ enum osd_qid_declare_flags osd_qid_declare_flags)
{
- struct osd_thread_info *info = osd_oti_get(env);
- struct lquota_id_info *qi = &info->oti_qi;
- struct qsd_instance *qsd = osd->od_quota_slave;
- int rcu, rcg; /* user & group rc */
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lquota_id_info *qi = &info->oti_qi;
+ struct qsd_instance *qsd = NULL;
+ int rcu, rcg, rcp = 0; /* user & group & project rc */
+ struct thandle *th = &oh->ot_super;
+ bool force = !!(osd_qid_declare_flags & OSD_QID_FORCE) ||
+ th->th_ignore_quota;
ENTRY;
+ if (osd_qid_declare_flags & OSD_QID_INODE)
+ qsd = osd->od_quota_slave_md;
+ else if (osd_qid_declare_flags & OSD_QID_BLK)
+ qsd = osd->od_quota_slave_dt;
+ else
+ RETURN(0);
+
if (unlikely(qsd == NULL))
/* quota slave instance hasn't been allocated yet */
RETURN(0);
qi->lqi_id.qid_uid = uid;
qi->lqi_type = USRQUOTA;
qi->lqi_space = space;
- qi->lqi_is_blk = is_blk;
+ qi->lqi_is_blk = !!(osd_qid_declare_flags & OSD_QID_BLK);
rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
-
if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS))
/* ignore EDQUOT & EINPROGRESS when changes are done by root */
rcu = 0;
qi->lqi_id.qid_gid = gid;
qi->lqi_type = GRPQUOTA;
rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
-
if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS))
/* as before, ignore EDQUOT & EINPROGRESS for root */
rcg = 0;
- RETURN(rcu ? rcu : rcg);
+#ifdef ZFS_PROJINHERIT
+ if (rcg && (rcg != -EDQUOT || flags == NULL))
+ RETURN(rcg);
+
+ /* for project quota */
+ if (osd->od_projectused_dn) {
+ qi->lqi_id.qid_projid = projid;
+ qi->lqi_type = PRJQUOTA;
+ rcp = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
+ if (force && (rcp == -EDQUOT || rcp == -EINPROGRESS))
+ rcp = 0;
+ }
+#endif
+
+ RETURN(rcu ? rcu : (rcg ? rcg : rcp));
}