* GPL HEADER END
*/
/*
- * Copyright (c) 2011 Whamcloud, Inc.
+ * Copyright (c) 2012, 2017, Intel Corporation.
* Use is subject to license terms.
*
* Author: Johann Lombardi <johann@whamcloud.com>
#include <lustre_quota.h>
#include <obd.h>
-#include "udmu.h"
#include "osd_internal.h"
/**
- * Helper function to retrieve DMU object id from fid for accounting object
+ * Helper function to estimate the number of inodes in use for the given
+ * uid/gid/projid from the block usage
*/
-uint64_t osd_quota_fid2dmu(const struct lu_fid *fid)
+static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes)
{
- LASSERT(fid_is_acct(fid));
- if (fid_oid(fid) == ACCT_GROUP_OID)
- return DMU_GROUPUSED_OBJECT;
- return DMU_USERUSED_OBJECT;
+ uint64_t refdbytes, availbytes, usedobjs, availobjs;
+ uint64_t uidobjs, bshift;
+
+ /* get fresh statfs info */
+ dmu_objset_space(osd->od_os, &refdbytes, &availbytes,
+ &usedobjs, &availobjs);
+
+ /* estimate the number of objects based on the disk usage */
+ bshift = fls64(osd->od_max_blksz) - 1;
+ uidobjs = osd_objs_count_estimate(refdbytes, usedobjs,
+ uidbytes >> bshift, bshift);
+ if (uidbytes > 0)
+ /* if we have at least 1 byte, we have at least one dnode ... */
+ uidobjs = max_t(uint64_t, uidobjs, 1);
+
+ return uidobjs;
}
/**
*/
/**
- * Return space usage consumed by a given uid or gid.
+ * Return space usage consumed by a given uid or gid or projid.
* Block usage is accurrate since it is maintained by DMU itself.
* However, DMU does not provide inode accounting, so the #inodes in use
* is estimated from the block usage and statfs information.
* \param dtrec - is the record to fill with space usage information
* \param dtkey - is the id the of the user or group for which we would
* like to access disk usage.
- * \param capa - is the capability, not used.
*
* \retval +ve - success : exact match
* \retval -ve - failure
static int osd_acct_index_lookup(const struct lu_env *env,
struct dt_object *dtobj,
struct dt_rec *dtrec,
- const struct dt_key *dtkey,
- struct lustre_capa *capa)
+ const struct dt_key *dtkey)
{
- struct osd_thread_info *info = osd_oti_get(env);
- char *buf = info->oti_buf;
- struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
- struct osd_object *obj = osd_dt_obj(dtobj);
- struct osd_device *osd = osd_obj2dev(obj);
- int rc;
- uint64_t oid;
+ struct osd_thread_info *info = osd_oti_get(env);
+ char *buf = info->oti_buf;
+ struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
+ struct osd_object *obj = osd_dt_obj(dtobj);
+ struct osd_device *osd = osd_obj2dev(obj);
+ dnode_t *dn = obj->oo_dn;
+ size_t buflen = sizeof(info->oti_buf);
+ int rc;
ENTRY;
rec->bspace = rec->ispace = 0;
- /* convert the 64-bit uid/gid into a string */
- sprintf(buf, "%llx", *((__u64 *)dtkey));
- /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be
- * used */
- oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu));
+ /* convert the 64-bit uid/gid/projid into a string */
+ snprintf(buf, buflen, "%llx", *((__u64 *)dtkey));
+ if (unlikely(!dn)) {
+ CDEBUG(D_QUOTA, "%s: miss accounting obj for %s\n",
+ osd->od_svname, buf);
+
+ RETURN(-ENOENT);
+ }
/* disk usage (in bytes) is maintained by DMU.
* DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which
- * not associated with any dmu_but_t (see dnode_special_open()).
- * As a consequence, we cannot use udmu_zap_lookup() here since it
- * requires a valid oo_db. */
- rc = -zap_lookup(osd->od_objset.os, oid, buf, sizeof(uint64_t), 1,
- &rec->bspace);
- if (rc == -ENOENT)
- /* user/group has not created anything yet */
+ * not associated with any dmu_but_t (see dnode_special_open()). */
+ rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, sizeof(uint64_t), 1,
+ &rec->bspace);
+ if (rc == -ENOENT) {
+ /* user/group/project has not created anything yet */
CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n",
osd->od_svname, buf);
- else if (rc)
+ /* -ENOENT is normal case, convert it as 1. */
+ rc = 1;
+ } else if (rc) {
RETURN(rc);
+ }
- if (osd->od_quota_iused_est) {
+ if (!osd_dmu_userobj_accounting_available(osd)) {
if (rec->bspace != 0)
/* estimate #inodes in use */
- rec->ispace = udmu_objset_user_iused(&osd->od_objset,
- rec->bspace);
- RETURN(+1);
+ rec->ispace = osd_objset_user_iused(osd, rec->bspace);
+ rc = 1;
+ } else {
+ snprintf(buf, buflen, OSD_DMU_USEROBJ_PREFIX "%llx",
+ *((__u64 *)dtkey));
+ rc = osd_zap_lookup(osd, dn->dn_object, dn, buf,
+ sizeof(uint64_t), 1, &rec->ispace);
+ if (rc == -ENOENT) {
+ CDEBUG(D_QUOTA,
+ "%s: id %s not found dnode accounting\n",
+ osd->od_svname, buf);
+ /* -ENOENT is normal case, convert it as 1. */
+ rc = 1;
+ } else if (rc == 0) {
+ rc = 1;
+ }
}
- /* as for inode accounting, it is not maintained by DMU, so we just
- * use our own ZAP to track inode usage */
- rc = -zap_lookup(osd->od_objset.os, obj->oo_db->db_object,
- buf, sizeof(uint64_t), 1, &rec->ispace);
- if (rc == -ENOENT)
- /* user/group has not created any file yet */
- CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
- osd->od_svname, buf);
- else if (rc)
- RETURN(rc);
-
- RETURN(+1);
+ RETURN(rc);
}
/**
*
* \param dt - osd index object
* \param attr - not used
- * \param capa - BYPASS_CAPA
*/
static struct dt_it *osd_it_acct_init(const struct lu_env *env,
struct dt_object *dt,
- __u32 attr,
- struct lustre_capa *capa)
+ __u32 attr)
{
- struct osd_thread_info *info = osd_oti_get(env);
- struct osd_it_quota *it;
- struct lu_object *lo = &dt->do_lu;
- struct osd_device *osd = osd_dev(lo->lo_dev);
- int rc;
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct osd_it_quota *it;
+ struct osd_object *obj = osd_dt_obj(dt);
+ struct osd_device *osd = osd_obj2dev(obj);
+ dnode_t *dn = obj->oo_dn;
+ int rc;
ENTRY;
- LASSERT(lu_object_exists(lo));
+ if (unlikely(!dn)) {
+ CDEBUG(D_QUOTA, "%s: Not found in DMU accounting ZAP\n",
+ osd->od_svname);
+
+ RETURN(ERR_PTR(-ENOENT));
+ }
if (info == NULL)
RETURN(ERR_PTR(-ENOMEM));
- it = &info->oti_it_quota;
+ OBD_ALLOC_PTR(it);
+ if (it == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
memset(it, 0, sizeof(*it));
- it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo));
+ it->oiq_oid = dn->dn_object;
/* initialize zap cursor */
- rc = -udmu_zap_cursor_init(&it->oiq_zc, &osd->od_objset, it->oiq_oid,0);
- if (rc)
+ rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0);
+ if (rc != 0) {
+ OBD_FREE_PTR(it);
RETURN(ERR_PTR(rc));
+ }
/* take object reference */
- lu_object_get(lo);
+ lu_object_get(&dt->do_lu);
it->oiq_obj = osd_dt_obj(dt);
it->oiq_reset = 1;
*/
static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
{
- struct osd_it_quota *it = (struct osd_it_quota *)di;
+ struct osd_it_quota *it = (struct osd_it_quota *)di;
ENTRY;
- udmu_zap_cursor_fini(it->oiq_zc);
- lu_object_put(env, &it->oiq_obj->oo_dt.do_lu);
+
+ osd_zap_cursor_fini(it->oiq_zc);
+ osd_object_put(env, it->oiq_obj);
+ OBD_FREE_PTR(it);
+
EXIT;
}
/**
+ * Locate the first entry that is for space accounting.
+ */
+static int osd_zap_locate(struct osd_it_quota *it, zap_attribute_t *za)
+{
+ int rc;
+ ENTRY;
+
+ while (1) {
+ rc = -zap_cursor_retrieve(it->oiq_zc, za);
+ if (rc)
+ break;
+
+ if (strncmp(za->za_name, OSD_DMU_USEROBJ_PREFIX,
+ OSD_DMU_USEROBJ_PREFIX_LEN))
+ break;
+
+ zap_cursor_advance(it->oiq_zc);
+ }
+
+ RETURN(rc);
+}
+
+/**
* Move on to the next valid entry.
*
* \param di - osd iterator
static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
{
struct osd_it_quota *it = (struct osd_it_quota *)di;
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
int rc;
ENTRY;
if (it->oiq_reset == 0)
zap_cursor_advance(it->oiq_zc);
it->oiq_reset = 0;
- rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32);
- if (rc == -ENOENT) /* reached the end */
- RETURN(+1);
- RETURN(rc);
+
+ rc = osd_zap_locate(it, za);
+ RETURN(rc == -ENOENT ? 1 : rc);
}
/**
const struct dt_it *di)
{
struct osd_it_quota *it = (struct osd_it_quota *)di;
- struct osd_thread_info *info = osd_oti_get(env);
- char *buf = info->oti_buf;
- char *p;
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
int rc;
ENTRY;
it->oiq_reset = 0;
- rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32);
+ rc = osd_zap_locate(it, za);
if (rc)
RETURN(ERR_PTR(rc));
- it->oiq_id = simple_strtoull(buf, &p, 16);
+
+ rc = kstrtoull(za->za_name, 16, &it->oiq_id);
+ if (rc)
+ CERROR("couldn't parse name %s\n", za->za_name);
+
RETURN((struct dt_key *) &it->oiq_id);
}
RETURN((int)sizeof(uint64_t));
}
+/*
+ * zap_cursor_retrieve read from current record.
+ * to read bytes we need to call zap_lookup explicitly.
+ */
+static int osd_zap_cursor_retrieve_value(const struct lu_env *env,
+ struct osd_it_quota *it,
+ char *buf, int buf_size,
+ int *bytes_read)
+{
+ const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
+ zap_cursor_t *zc = it->oiq_zc;
+ struct osd_device *osd = osd_obj2dev(it->oiq_obj);
+ int rc, actual_size;
+
+ rc = -zap_cursor_retrieve(zc, za);
+ if (unlikely(rc != 0))
+ return rc;
+
+ if (unlikely(za->za_integer_length <= 0))
+ return -ERANGE;
+
+ actual_size = za->za_integer_length * za->za_num_integers;
+
+ if (actual_size > buf_size) {
+ actual_size = buf_size;
+ buf_size = actual_size / za->za_integer_length;
+ } else {
+ buf_size = za->za_num_integers;
+ }
+
+ /* use correct special ID to request bytes used */
+ rc = osd_zap_lookup(osd, fid_oid(fid) == ACCT_GROUP_OID ?
+ DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT, NULL,
+ za->za_name, za->za_integer_length, buf_size, buf);
+ if (likely(rc == 0))
+ *bytes_read = actual_size;
+
+ return rc;
+}
+
/**
* Return pointer to the record under iterator.
*
struct dt_rec *dtrec, __u32 attr)
{
struct osd_thread_info *info = osd_oti_get(env);
- char *buf = info->oti_buf;
+ zap_attribute_t *za = &info->oti_za;
struct osd_it_quota *it = (struct osd_it_quota *)di;
struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
struct osd_object *obj = it->oiq_obj;
rec->ispace = rec->bspace = 0;
/* retrieve block usage from the DMU accounting object */
- rc = -udmu_zap_cursor_retrieve_value(env, it->oiq_zc,
- (char *)&rec->bspace,
- sizeof(uint64_t), &bytes_read);
+ rc = osd_zap_cursor_retrieve_value(env, it, (char *)&rec->bspace,
+ sizeof(uint64_t), &bytes_read);
if (rc)
RETURN(rc);
- if (osd->od_quota_iused_est) {
+ if (!osd_dmu_userobj_accounting_available(osd)) {
if (rec->bspace != 0)
/* estimate #inodes in use */
- rec->ispace = udmu_objset_user_iused(&osd->od_objset,
- rec->bspace);
+ rec->ispace = osd_objset_user_iused(osd, rec->bspace);
RETURN(0);
}
/* retrieve key associated with the current cursor */
- rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32);
- if (rc)
+ rc = -zap_cursor_retrieve(it->oiq_zc, za);
+ if (unlikely(rc != 0))
RETURN(rc);
- /* inode accounting is not maintained by DMU, so we use our own ZAP to
- * track inode usage */
- rc = -zap_lookup(osd->od_objset.os, it->oiq_obj->oo_db->db_object,
- buf, sizeof(uint64_t), 1, &rec->ispace);
+ /* inode accounting is maintained by DMU since 0.7.0 */
+ strncpy(info->oti_buf, OSD_DMU_USEROBJ_PREFIX,
+ OSD_DMU_USEROBJ_PREFIX_LEN);
+ strlcpy(info->oti_buf + OSD_DMU_USEROBJ_PREFIX_LEN, za->za_name,
+ sizeof(info->oti_buf) - OSD_DMU_USEROBJ_PREFIX_LEN);
+ rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object,
+ it->oiq_obj->oo_dn, info->oti_buf, sizeof(uint64_t),
+ 1, &rec->ispace);
if (rc == -ENOENT)
/* user/group has not created any file yet */
CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
- osd->od_svname, buf);
+ osd->od_svname, info->oti_buf);
else if (rc)
RETURN(rc);
struct osd_it_quota *it = (struct osd_it_quota *)di;
ENTRY;
it->oiq_reset = 0;
- RETURN(udmu_zap_cursor_serialize(it->oiq_zc));
+ RETURN(osd_zap_cursor_serialize(it->oiq_zc));
}
/**
{
struct osd_it_quota *it = (struct osd_it_quota *)di;
struct osd_device *osd = osd_obj2dev(it->oiq_obj);
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
zap_cursor_t *zc;
int rc;
ENTRY;
/* create new cursor pointing to the new hash */
- rc = -udmu_zap_cursor_init(&zc, &osd->od_objset, it->oiq_oid, hash);
+ rc = osd_zap_cursor_init(&zc, osd->od_os, it->oiq_oid, hash);
if (rc)
RETURN(rc);
- udmu_zap_cursor_fini(it->oiq_zc);
+ osd_zap_cursor_fini(it->oiq_zc);
it->oiq_zc = zc;
it->oiq_reset = 0;
- rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32);
+ rc = osd_zap_locate(it, za);
if (rc == 0)
- RETURN(+1);
+ rc = 1;
else if (rc == -ENOENT)
- RETURN(0);
+ rc = 0;
RETURN(rc);
}
* move to the first valid record.
*
* \param di - osd iterator
- * \param key - uid or gid
+ * \param key - uid or gid or projid
*
* \retval +ve - di points to exact matched key
* \retval 0 - di points to the first valid record
* \param osd - is the osd_device
* \param uid - user id of the inode
* \param gid - group id of the inode
+ * \param projid - project id of the inode
* \param space - how many blocks/inodes will be consumed/released
* \param oh - osd transaction handle
- * \param is_blk - block quota or inode quota?
* \param flags - if the operation is write, return no user quota, no
* group quota, or sync commit flags to the caller
- * \param force - set to 1 when changes are performed by root user and thus
- * can't failed with EDQUOT
+ * \param osd_qid_declare_flags - indicate this is a inode/block accounting
+ * and whether changes are performed by root user
*
* \retval 0 - success
* \retval -ve - failure
*/
int osd_declare_quota(const struct lu_env *env, struct osd_device *osd,
- qid_t uid, qid_t gid, long long space,
- struct osd_thandle *oh, bool is_blk, int *flags,
- bool force)
+ qid_t uid, qid_t gid, qid_t projid, long long space,
+ struct osd_thandle *oh,
+ enum osd_quota_local_flags *local_flags,
+ enum osd_qid_declare_flags osd_qid_declare_flags)
{
- struct osd_thread_info *info = osd_oti_get(env);
- struct lquota_id_info *qi = &info->oti_qi;
- struct qsd_instance *qsd = osd->od_quota_slave;
- int rcu, rcg; /* user & group rc */
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lquota_id_info *qi = &info->oti_qi;
+ struct qsd_instance *qsd = NULL;
+ int rcu, rcg, rcp = 0; /* user & group & project rc */
+ struct thandle *th = &oh->ot_super;
+ bool force = !!(osd_qid_declare_flags & OSD_QID_FORCE) ||
+ th->th_ignore_quota;
ENTRY;
+ if (osd_qid_declare_flags & OSD_QID_INODE)
+ qsd = osd->od_quota_slave_md;
+ else if (osd_qid_declare_flags & OSD_QID_BLK)
+ qsd = osd->od_quota_slave_dt;
+ else
+ RETURN(0);
+
if (unlikely(qsd == NULL))
/* quota slave instance hasn't been allocated yet */
RETURN(0);
qi->lqi_id.qid_uid = uid;
qi->lqi_type = USRQUOTA;
qi->lqi_space = space;
- qi->lqi_is_blk = is_blk;
- rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
-
+ qi->lqi_is_blk = !!(osd_qid_declare_flags & OSD_QID_BLK);
+ rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, local_flags);
if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS))
/* ignore EDQUOT & EINPROGRESS when changes are done by root */
rcu = 0;
* for group id. This is only for commit write, which has @flags passed
* in. See osd_declare_write_commit().
* When force is set to true, we also want to proceed with the gid */
- if (rcu && (rcu != -EDQUOT || flags == NULL))
+ if (rcu && (rcu != -EDQUOT || local_flags == NULL))
RETURN(rcu);
/* and now group quota */
qi->lqi_id.qid_gid = gid;
qi->lqi_type = GRPQUOTA;
- rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags);
-
+ rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, local_flags);
if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS))
/* as before, ignore EDQUOT & EINPROGRESS for root */
rcg = 0;
- RETURN(rcu ? rcu : rcg);
+#ifdef ZFS_PROJINHERIT
+ if (rcg && (rcg != -EDQUOT || local_flags == NULL))
+ RETURN(rcg);
+
+ /* for project quota */
+ if (osd->od_projectused_dn) {
+ qi->lqi_id.qid_projid = projid;
+ qi->lqi_type = PRJQUOTA;
+ rcp = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi,
+ local_flags);
+ if (force && (rcp == -EDQUOT || rcp == -EINPROGRESS))
+ rcp = 0;
+ }
+#endif
+
+ RETURN(rcu ? rcu : (rcg ? rcg : rcp));
}