X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_quota.c;h=2e75b8c9403f568477aac6d66c3653fd30751558;hb=d2e8208e22f21bb7354a9207f381217c222d3df3;hp=4ef1ad5f633b4e78289e666f0d26dd2c0eac6502;hpb=11afef00b6af407b8987076bd4f1ec9bc77eb75e;p=fs%2Flustre-release.git diff --git a/lustre/osd-zfs/osd_quota.c b/lustre/osd-zfs/osd_quota.c index 4ef1ad5..2e75b8c 100644 --- a/lustre/osd-zfs/osd_quota.c +++ b/lustre/osd-zfs/osd_quota.c @@ -21,30 +21,20 @@ * GPL HEADER END */ /* - * Copyright (c) 2012, 2015, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. * Use is subject to license terms. * * Author: Johann Lombardi */ +#include #include #include #include "osd_internal.h" /** - * Helper function to retrieve DMU object id from fid for accounting object - */ -uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) -{ - LASSERT(fid_is_acct(fid)); - if (fid_oid(fid) == ACCT_GROUP_OID) - return DMU_GROUPUSED_OBJECT; - return DMU_USERUSED_OBJECT; -} - -/** - * Helper function to estimate the number of inodes in use for a give uid/gid - * from the block usage + * Helper function to estimate the number of inodes in use for the given + * uid/gid/projid from the block usage */ static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes) { @@ -71,7 +61,7 @@ static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes) */ /** - * Return space usage consumed by a given uid or gid. + * Return space usage consumed by a given uid or gid or projid. * Block usage is accurrate since it is maintained by DMU itself. * However, DMU does not provide inode accounting, so the #inodes in use * is estimated from the block usage and statfs information. @@ -90,33 +80,38 @@ static int osd_acct_index_lookup(const struct lu_env *env, struct dt_rec *dtrec, const struct dt_key *dtkey) { - struct osd_thread_info *info = osd_oti_get(env); - char *buf = info->oti_buf; - size_t buflen = sizeof(info->oti_buf); - struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; - struct osd_object *obj = osd_dt_obj(dtobj); - struct osd_device *osd = osd_obj2dev(obj); - int rc; - uint64_t oid; + struct osd_thread_info *info = osd_oti_get(env); + char *buf = info->oti_buf; + struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; + struct osd_object *obj = osd_dt_obj(dtobj); + struct osd_device *osd = osd_obj2dev(obj); + dnode_t *dn = obj->oo_dn; + size_t buflen = sizeof(info->oti_buf); + int rc; ENTRY; rec->bspace = rec->ispace = 0; - /* convert the 64-bit uid/gid into a string */ + /* convert the 64-bit uid/gid/projid into a string */ snprintf(buf, buflen, "%llx", *((__u64 *)dtkey)); - /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be - * used */ - oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu)); + if (unlikely(!dn)) { + CDEBUG(D_QUOTA, "%s: miss accounting obj for %s\n", + osd->od_svname, buf); + + RETURN(-ENOENT); + } /* disk usage (in bytes) is maintained by DMU. * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which * not associated with any dmu_but_t (see dnode_special_open()). */ - rc = zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1, - &rec->bspace); + rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, sizeof(uint64_t), 1, + &rec->bspace); if (rc == -ENOENT) { - /* user/group has not created anything yet */ + /* user/group/project has not created anything yet */ CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n", osd->od_svname, buf); + /* -ENOENT is normal case, convert it as 1. */ + rc = 1; } else if (rc) { RETURN(rc); } @@ -129,12 +124,14 @@ static int osd_acct_index_lookup(const struct lu_env *env, } else { snprintf(buf, buflen, OSD_DMU_USEROBJ_PREFIX "%llx", *((__u64 *)dtkey)); - rc = zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1, - &rec->ispace); + rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, + sizeof(uint64_t), 1, &rec->ispace); if (rc == -ENOENT) { CDEBUG(D_QUOTA, "%s: id %s not found dnode accounting\n", osd->od_svname, buf); + /* -ENOENT is normal case, convert it as 1. */ + rc = 1; } else if (rc == 0) { rc = 1; } @@ -153,14 +150,20 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, struct dt_object *dt, __u32 attr) { - struct osd_thread_info *info = osd_oti_get(env); - struct osd_it_quota *it; - struct lu_object *lo = &dt->do_lu; - struct osd_device *osd = osd_dev(lo->lo_dev); - int rc; + struct osd_thread_info *info = osd_oti_get(env); + struct osd_it_quota *it; + struct osd_object *obj = osd_dt_obj(dt); + struct osd_device *osd = osd_obj2dev(obj); + dnode_t *dn = obj->oo_dn; + int rc; ENTRY; - LASSERT(lu_object_exists(lo)); + if (unlikely(!dn)) { + CDEBUG(D_QUOTA, "%s: Not found in DMU accounting ZAP\n", + osd->od_svname); + + RETURN(ERR_PTR(-ENOENT)); + } if (info == NULL) RETURN(ERR_PTR(-ENOMEM)); @@ -170,7 +173,7 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, RETURN(ERR_PTR(-ENOMEM)); memset(it, 0, sizeof(*it)); - it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo)); + it->oiq_oid = dn->dn_object; /* initialize zap cursor */ rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0); @@ -180,7 +183,7 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, } /* take object reference */ - lu_object_get(lo); + lu_object_get(&dt->do_lu); it->oiq_obj = osd_dt_obj(dt); it->oiq_reset = 1; @@ -205,6 +208,29 @@ static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di) } /** + * Locate the first entry that is for space accounting. + */ +static int osd_zap_locate(struct osd_it_quota *it, zap_attribute_t *za) +{ + int rc; + ENTRY; + + while (1) { + rc = -zap_cursor_retrieve(it->oiq_zc, za); + if (rc) + break; + + if (strncmp(za->za_name, OSD_DMU_USEROBJ_PREFIX, + OSD_DMU_USEROBJ_PREFIX_LEN)) + break; + + zap_cursor_advance(it->oiq_zc); + } + + RETURN(rc); +} + +/** * Move on to the next valid entry. * * \param di - osd iterator @@ -223,10 +249,9 @@ static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di) if (it->oiq_reset == 0) zap_cursor_advance(it->oiq_zc); it->oiq_reset = 0; - rc = -zap_cursor_retrieve(it->oiq_zc, za); - if (rc == -ENOENT) /* reached the end */ - rc = 1; - RETURN(rc); + + rc = osd_zap_locate(it, za); + RETURN(rc == -ENOENT ? 1 : rc); } /** @@ -243,10 +268,13 @@ static struct dt_key *osd_it_acct_key(const struct lu_env *env, ENTRY; it->oiq_reset = 0; - rc = -zap_cursor_retrieve(it->oiq_zc, za); + rc = osd_zap_locate(it, za); if (rc) RETURN(ERR_PTR(rc)); + rc = kstrtoull(za->za_name, 16, &it->oiq_id); + if (rc) + CERROR("couldn't parse name %s\n", za->za_name); RETURN((struct dt_key *) &it->oiq_id); } @@ -268,15 +296,19 @@ static int osd_it_acct_key_size(const struct lu_env *env, * to read bytes we need to call zap_lookup explicitly. */ static int osd_zap_cursor_retrieve_value(const struct lu_env *env, - zap_cursor_t *zc, char *buf, - int buf_size, int *bytes_read) + struct osd_it_quota *it, + char *buf, int buf_size, + int *bytes_read) { + const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu); zap_attribute_t *za = &osd_oti_get(env)->oti_za; + zap_cursor_t *zc = it->oiq_zc; + struct osd_device *osd = osd_obj2dev(it->oiq_obj); int rc, actual_size; rc = -zap_cursor_retrieve(zc, za); if (unlikely(rc != 0)) - return -rc; + return rc; if (unlikely(za->za_integer_length <= 0)) return -ERANGE; @@ -290,10 +322,10 @@ static int osd_zap_cursor_retrieve_value(const struct lu_env *env, buf_size = za->za_num_integers; } - rc = -zap_lookup(zc->zc_objset, zc->zc_zapobj, - za->za_name, za->za_integer_length, - buf_size, buf); - + /* use correct special ID to request bytes used */ + rc = osd_zap_lookup(osd, fid_oid(fid) == ACCT_GROUP_OID ? + DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT, NULL, + za->za_name, za->za_integer_length, buf_size, buf); if (likely(rc == 0)) *bytes_read = actual_size; @@ -324,8 +356,7 @@ static int osd_it_acct_rec(const struct lu_env *env, rec->ispace = rec->bspace = 0; /* retrieve block usage from the DMU accounting object */ - rc = osd_zap_cursor_retrieve_value(env, it->oiq_zc, - (char *)&rec->bspace, + rc = osd_zap_cursor_retrieve_value(env, it, (char *)&rec->bspace, sizeof(uint64_t), &bytes_read); if (rc) RETURN(rc); @@ -342,14 +373,18 @@ static int osd_it_acct_rec(const struct lu_env *env, if (unlikely(rc != 0)) RETURN(rc); - /* inode accounting is not maintained by DMU, so we use our own ZAP to - * track inode usage */ - rc = -zap_lookup(osd->od_os, it->oiq_obj->oo_dn->dn_object, - za->za_name, sizeof(uint64_t), 1, &rec->ispace); + /* inode accounting is maintained by DMU since 0.7.0 */ + strncpy(info->oti_buf, OSD_DMU_USEROBJ_PREFIX, + OSD_DMU_USEROBJ_PREFIX_LEN); + strlcpy(info->oti_buf + OSD_DMU_USEROBJ_PREFIX_LEN, za->za_name, + sizeof(info->oti_buf) - OSD_DMU_USEROBJ_PREFIX_LEN); + rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object, + it->oiq_obj->oo_dn, info->oti_buf, sizeof(uint64_t), + 1, &rec->ispace); if (rc == -ENOENT) /* user/group has not created any file yet */ CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", - osd->od_svname, za->za_name); + osd->od_svname, info->oti_buf); else if (rc) RETURN(rc); @@ -399,12 +434,11 @@ static int osd_it_acct_load(const struct lu_env *env, it->oiq_zc = zc; it->oiq_reset = 0; - rc = -zap_cursor_retrieve(it->oiq_zc, za); + rc = osd_zap_locate(it, za); if (rc == 0) rc = 1; else if (rc == -ENOENT) rc = 0; - RETURN(rc); } @@ -413,7 +447,7 @@ static int osd_it_acct_load(const struct lu_env *env, * move to the first valid record. * * \param di - osd iterator - * \param key - uid or gid + * \param key - uid or gid or projid * * \retval +ve - di points to exact matched key * \retval 0 - di points to the first valid record @@ -469,28 +503,39 @@ const struct dt_index_operations osd_acct_index_ops = { * \param osd - is the osd_device * \param uid - user id of the inode * \param gid - group id of the inode + * \param projid - project id of the inode * \param space - how many blocks/inodes will be consumed/released * \param oh - osd transaction handle - * \param is_blk - block quota or inode quota? * \param flags - if the operation is write, return no user quota, no * group quota, or sync commit flags to the caller - * \param force - set to 1 when changes are performed by root user and thus - * can't failed with EDQUOT + * \param osd_qid_declare_flags - indicate this is a inode/block accounting + * and whether changes are performed by root user * * \retval 0 - success * \retval -ve - failure */ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, - qid_t uid, qid_t gid, long long space, - struct osd_thandle *oh, bool is_blk, int *flags, - bool force) + qid_t uid, qid_t gid, qid_t projid, long long space, + struct osd_thandle *oh, + enum osd_quota_local_flags *local_flags, + enum osd_qid_declare_flags osd_qid_declare_flags) { - struct osd_thread_info *info = osd_oti_get(env); - struct lquota_id_info *qi = &info->oti_qi; - struct qsd_instance *qsd = osd->od_quota_slave; - int rcu, rcg; /* user & group rc */ + struct osd_thread_info *info = osd_oti_get(env); + struct lquota_id_info *qi = &info->oti_qi; + struct qsd_instance *qsd = NULL; + int rcu, rcg, rcp = 0; /* user & group & project rc */ + struct thandle *th = &oh->ot_super; + bool force = !!(osd_qid_declare_flags & OSD_QID_FORCE) || + th->th_ignore_quota; ENTRY; + if (osd_qid_declare_flags & OSD_QID_INODE) + qsd = osd->od_quota_slave_md; + else if (osd_qid_declare_flags & OSD_QID_BLK) + qsd = osd->od_quota_slave_dt; + else + RETURN(0); + if (unlikely(qsd == NULL)) /* quota slave instance hasn't been allocated yet */ RETURN(0); @@ -499,9 +544,8 @@ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, qi->lqi_id.qid_uid = uid; qi->lqi_type = USRQUOTA; qi->lqi_space = space; - qi->lqi_is_blk = is_blk; - rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); - + qi->lqi_is_blk = !!(osd_qid_declare_flags & OSD_QID_BLK); + rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, local_flags); if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS)) /* ignore EDQUOT & EINPROGRESS when changes are done by root */ rcu = 0; @@ -510,17 +554,31 @@ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, * for group id. This is only for commit write, which has @flags passed * in. See osd_declare_write_commit(). * When force is set to true, we also want to proceed with the gid */ - if (rcu && (rcu != -EDQUOT || flags == NULL)) + if (rcu && (rcu != -EDQUOT || local_flags == NULL)) RETURN(rcu); /* and now group quota */ qi->lqi_id.qid_gid = gid; qi->lqi_type = GRPQUOTA; - rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); - + rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, local_flags); if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS)) /* as before, ignore EDQUOT & EINPROGRESS for root */ rcg = 0; - RETURN(rcu ? rcu : rcg); +#ifdef ZFS_PROJINHERIT + if (rcg && (rcg != -EDQUOT || local_flags == NULL)) + RETURN(rcg); + + /* for project quota */ + if (osd->od_projectused_dn) { + qi->lqi_id.qid_projid = projid; + qi->lqi_type = PRJQUOTA; + rcp = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, + local_flags); + if (force && (rcp == -EDQUOT || rcp == -EINPROGRESS)) + rcp = 0; + } +#endif + + RETURN(rcu ? rcu : (rcg ? rcg : rcp)); }