X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_quota.c;h=180d66ddac506d422241059290cd58a75ad73230;hb=65a8ff5fbe8ca014bd01150ab102d8aa43f78cff;hp=2e9152c6da02704a0702affbe3a499724e03e2cc;hpb=5963af745b3aa14410d5ceb66f8a7b7d6aaf576a;p=fs%2Flustre-release.git diff --git a/lustre/osd-zfs/osd_quota.c b/lustre/osd-zfs/osd_quota.c index 2e9152c..180d66d 100644 --- a/lustre/osd-zfs/osd_quota.c +++ b/lustre/osd-zfs/osd_quota.c @@ -21,7 +21,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2012, 2015, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. * Use is subject to license terms. * * Author: Johann Lombardi @@ -32,19 +32,8 @@ #include "osd_internal.h" /** - * Helper function to retrieve DMU object id from fid for accounting object - */ -uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) -{ - LASSERT(fid_is_acct(fid)); - if (fid_oid(fid) == ACCT_GROUP_OID) - return DMU_GROUPUSED_OBJECT; - return DMU_USERUSED_OBJECT; -} - -/** - * Helper function to estimate the number of inodes in use for a give uid/gid - * from the block usage + * Helper function to estimate the number of inodes in use for the given + * uid/gid/projid from the block usage */ static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes) { @@ -71,7 +60,7 @@ static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes) */ /** - * Return space usage consumed by a given uid or gid. + * Return space usage consumed by a given uid or gid or projid. * Block usage is accurrate since it is maintained by DMU itself. * However, DMU does not provide inode accounting, so the #inodes in use * is estimated from the block usage and statfs information. @@ -90,56 +79,64 @@ static int osd_acct_index_lookup(const struct lu_env *env, struct dt_rec *dtrec, const struct dt_key *dtkey) { - struct osd_thread_info *info = osd_oti_get(env); - char *buf = info->oti_buf; - struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; - struct osd_object *obj = osd_dt_obj(dtobj); - struct osd_device *osd = osd_obj2dev(obj); - int rc; - uint64_t oid; + struct osd_thread_info *info = osd_oti_get(env); + char *buf = info->oti_buf; + struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; + struct osd_object *obj = osd_dt_obj(dtobj); + struct osd_device *osd = osd_obj2dev(obj); + dnode_t *dn = obj->oo_dn; + size_t buflen = sizeof(info->oti_buf); + int rc; ENTRY; rec->bspace = rec->ispace = 0; - /* convert the 64-bit uid/gid into a string */ - sprintf(buf, "%llx", *((__u64 *)dtkey)); - /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be - * used */ - oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu)); + /* convert the 64-bit uid/gid/projid into a string */ + snprintf(buf, buflen, "%llx", *((__u64 *)dtkey)); + if (unlikely(!dn)) { + CDEBUG(D_QUOTA, "%s: miss accounting obj for %s\n", + osd->od_svname, buf); + + RETURN(-ENOENT); + } /* disk usage (in bytes) is maintained by DMU. * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which - * not associated with any dmu_but_t (see dnode_special_open()). - * As a consequence, we cannot use udmu_zap_lookup() here since it - * requires a valid oo_dn. */ - rc = -zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1, - &rec->bspace); - if (rc == -ENOENT) - /* user/group has not created anything yet */ + * not associated with any dmu_but_t (see dnode_special_open()). */ + rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, sizeof(uint64_t), 1, + &rec->bspace); + if (rc == -ENOENT) { + /* user/group/project has not created anything yet */ CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n", osd->od_svname, buf); - else if (rc) + /* -ENOENT is normal case, convert it as 1. */ + rc = 1; + } else if (rc) { RETURN(rc); + } - if (osd->od_quota_iused_est) { + if (!osd_dmu_userobj_accounting_available(osd)) { if (rec->bspace != 0) /* estimate #inodes in use */ rec->ispace = osd_objset_user_iused(osd, rec->bspace); - RETURN(+1); + rc = 1; + } else { + snprintf(buf, buflen, OSD_DMU_USEROBJ_PREFIX "%llx", + *((__u64 *)dtkey)); + rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, + sizeof(uint64_t), 1, &rec->ispace); + if (rc == -ENOENT) { + CDEBUG(D_QUOTA, + "%s: id %s not found dnode accounting\n", + osd->od_svname, buf); + /* -ENOENT is normal case, convert it as 1. */ + rc = 1; + } else if (rc == 0) { + rc = 1; + } } - /* as for inode accounting, it is not maintained by DMU, so we just - * use our own ZAP to track inode usage */ - rc = -zap_lookup(osd->od_os, obj->oo_dn->dn_object, - buf, sizeof(uint64_t), 1, &rec->ispace); - if (rc == -ENOENT) - /* user/group has not created any file yet */ - CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", - osd->od_svname, buf); - else if (rc) - RETURN(rc); - - RETURN(+1); + RETURN(rc); } /** @@ -152,14 +149,20 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, struct dt_object *dt, __u32 attr) { - struct osd_thread_info *info = osd_oti_get(env); - struct osd_it_quota *it; - struct lu_object *lo = &dt->do_lu; - struct osd_device *osd = osd_dev(lo->lo_dev); - int rc; + struct osd_thread_info *info = osd_oti_get(env); + struct osd_it_quota *it; + struct osd_object *obj = osd_dt_obj(dt); + struct osd_device *osd = osd_obj2dev(obj); + dnode_t *dn = obj->oo_dn; + int rc; ENTRY; - LASSERT(lu_object_exists(lo)); + if (unlikely(!dn)) { + CDEBUG(D_QUOTA, "%s: Not found in DMU accounting ZAP\n", + osd->od_svname); + + RETURN(ERR_PTR(-ENOENT)); + } if (info == NULL) RETURN(ERR_PTR(-ENOMEM)); @@ -169,7 +172,7 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, RETURN(ERR_PTR(-ENOMEM)); memset(it, 0, sizeof(*it)); - it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo)); + it->oiq_oid = dn->dn_object; /* initialize zap cursor */ rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0); @@ -179,7 +182,7 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, } /* take object reference */ - lu_object_get(lo); + lu_object_get(&dt->do_lu); it->oiq_obj = osd_dt_obj(dt); it->oiq_reset = 1; @@ -204,6 +207,29 @@ static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di) } /** + * Locate the first entry that is for space accounting. + */ +static int osd_zap_locate(struct osd_it_quota *it, zap_attribute_t *za) +{ + int rc; + ENTRY; + + while (1) { + rc = -zap_cursor_retrieve(it->oiq_zc, za); + if (rc) + break; + + if (strncmp(za->za_name, OSD_DMU_USEROBJ_PREFIX, + OSD_DMU_USEROBJ_PREFIX_LEN)) + break; + + zap_cursor_advance(it->oiq_zc); + } + + RETURN(rc); +} + +/** * Move on to the next valid entry. * * \param di - osd iterator @@ -222,10 +248,9 @@ static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di) if (it->oiq_reset == 0) zap_cursor_advance(it->oiq_zc); it->oiq_reset = 0; - rc = -zap_cursor_retrieve(it->oiq_zc, za); - if (rc == -ENOENT) /* reached the end */ - rc = 1; - RETURN(rc); + + rc = osd_zap_locate(it, za); + RETURN(rc == -ENOENT ? 1 : rc); } /** @@ -242,10 +267,13 @@ static struct dt_key *osd_it_acct_key(const struct lu_env *env, ENTRY; it->oiq_reset = 0; - rc = -zap_cursor_retrieve(it->oiq_zc, za); + rc = osd_zap_locate(it, za); if (rc) RETURN(ERR_PTR(rc)); + rc = kstrtoull(za->za_name, 16, &it->oiq_id); + if (rc) + CERROR("couldn't parse name %s\n", za->za_name); RETURN((struct dt_key *) &it->oiq_id); } @@ -267,15 +295,19 @@ static int osd_it_acct_key_size(const struct lu_env *env, * to read bytes we need to call zap_lookup explicitly. */ static int osd_zap_cursor_retrieve_value(const struct lu_env *env, - zap_cursor_t *zc, char *buf, - int buf_size, int *bytes_read) + struct osd_it_quota *it, + char *buf, int buf_size, + int *bytes_read) { + const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu); zap_attribute_t *za = &osd_oti_get(env)->oti_za; + zap_cursor_t *zc = it->oiq_zc; + struct osd_device *osd = osd_obj2dev(it->oiq_obj); int rc, actual_size; rc = -zap_cursor_retrieve(zc, za); if (unlikely(rc != 0)) - return -rc; + return rc; if (unlikely(za->za_integer_length <= 0)) return -ERANGE; @@ -289,10 +321,10 @@ static int osd_zap_cursor_retrieve_value(const struct lu_env *env, buf_size = za->za_num_integers; } - rc = -zap_lookup(zc->zc_objset, zc->zc_zapobj, - za->za_name, za->za_integer_length, - buf_size, buf); - + /* use correct special ID to request bytes used */ + rc = osd_zap_lookup(osd, fid_oid(fid) == ACCT_GROUP_OID ? + DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT, NULL, + za->za_name, za->za_integer_length, buf_size, buf); if (likely(rc == 0)) *bytes_read = actual_size; @@ -323,13 +355,12 @@ static int osd_it_acct_rec(const struct lu_env *env, rec->ispace = rec->bspace = 0; /* retrieve block usage from the DMU accounting object */ - rc = osd_zap_cursor_retrieve_value(env, it->oiq_zc, - (char *)&rec->bspace, + rc = osd_zap_cursor_retrieve_value(env, it, (char *)&rec->bspace, sizeof(uint64_t), &bytes_read); if (rc) RETURN(rc); - if (osd->od_quota_iused_est) { + if (!osd_dmu_userobj_accounting_available(osd)) { if (rec->bspace != 0) /* estimate #inodes in use */ rec->ispace = osd_objset_user_iused(osd, rec->bspace); @@ -341,14 +372,18 @@ static int osd_it_acct_rec(const struct lu_env *env, if (unlikely(rc != 0)) RETURN(rc); - /* inode accounting is not maintained by DMU, so we use our own ZAP to - * track inode usage */ - rc = -zap_lookup(osd->od_os, it->oiq_obj->oo_dn->dn_object, - za->za_name, sizeof(uint64_t), 1, &rec->ispace); + /* inode accounting is maintained by DMU since 0.7.0 */ + strncpy(info->oti_buf, OSD_DMU_USEROBJ_PREFIX, + OSD_DMU_USEROBJ_PREFIX_LEN); + strlcpy(info->oti_buf + OSD_DMU_USEROBJ_PREFIX_LEN, za->za_name, + sizeof(info->oti_buf) - OSD_DMU_USEROBJ_PREFIX_LEN); + rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object, + it->oiq_obj->oo_dn, info->oti_buf, sizeof(uint64_t), + 1, &rec->ispace); if (rc == -ENOENT) /* user/group has not created any file yet */ CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", - osd->od_svname, za->za_name); + osd->od_svname, info->oti_buf); else if (rc) RETURN(rc); @@ -398,12 +433,11 @@ static int osd_it_acct_load(const struct lu_env *env, it->oiq_zc = zc; it->oiq_reset = 0; - rc = -zap_cursor_retrieve(it->oiq_zc, za); + rc = osd_zap_locate(it, za); if (rc == 0) rc = 1; else if (rc == -ENOENT) rc = 0; - RETURN(rc); } @@ -412,7 +446,7 @@ static int osd_it_acct_load(const struct lu_env *env, * move to the first valid record. * * \param di - osd iterator - * \param key - uid or gid + * \param key - uid or gid or projid * * \retval +ve - di points to exact matched key * \retval 0 - di points to the first valid record @@ -468,28 +502,38 @@ const struct dt_index_operations osd_acct_index_ops = { * \param osd - is the osd_device * \param uid - user id of the inode * \param gid - group id of the inode + * \param projid - project id of the inode * \param space - how many blocks/inodes will be consumed/released * \param oh - osd transaction handle - * \param is_blk - block quota or inode quota? * \param flags - if the operation is write, return no user quota, no * group quota, or sync commit flags to the caller - * \param force - set to 1 when changes are performed by root user and thus - * can't failed with EDQUOT + * \param osd_qid_declare_flags - indicate this is a inode/block accounting + * and whether changes are performed by root user * * \retval 0 - success * \retval -ve - failure */ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, - qid_t uid, qid_t gid, long long space, - struct osd_thandle *oh, bool is_blk, int *flags, - bool force) + qid_t uid, qid_t gid, qid_t projid, long long space, + struct osd_thandle *oh, int *flags, + enum osd_qid_declare_flags osd_qid_declare_flags) { - struct osd_thread_info *info = osd_oti_get(env); - struct lquota_id_info *qi = &info->oti_qi; - struct qsd_instance *qsd = osd->od_quota_slave; - int rcu, rcg; /* user & group rc */ + struct osd_thread_info *info = osd_oti_get(env); + struct lquota_id_info *qi = &info->oti_qi; + struct qsd_instance *qsd = NULL; + int rcu, rcg, rcp = 0; /* user & group & project rc */ + struct thandle *th = &oh->ot_super; + bool force = !!(osd_qid_declare_flags & OSD_QID_FORCE) || + th->th_ignore_quota; ENTRY; + if (osd_qid_declare_flags & OSD_QID_INODE) + qsd = osd->od_quota_slave_md; + else if (osd_qid_declare_flags & OSD_QID_BLK) + qsd = osd->od_quota_slave_dt; + else + RETURN(0); + if (unlikely(qsd == NULL)) /* quota slave instance hasn't been allocated yet */ RETURN(0); @@ -498,9 +542,8 @@ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, qi->lqi_id.qid_uid = uid; qi->lqi_type = USRQUOTA; qi->lqi_space = space; - qi->lqi_is_blk = is_blk; + qi->lqi_is_blk = !!(osd_qid_declare_flags & OSD_QID_BLK); rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); - if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS)) /* ignore EDQUOT & EINPROGRESS when changes are done by root */ rcu = 0; @@ -516,10 +559,23 @@ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, qi->lqi_id.qid_gid = gid; qi->lqi_type = GRPQUOTA; rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); - if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS)) /* as before, ignore EDQUOT & EINPROGRESS for root */ rcg = 0; - RETURN(rcu ? rcu : rcg); +#ifdef ZFS_PROJINHERIT + if (rcg && (rcg != -EDQUOT || flags == NULL)) + RETURN(rcg); + + /* for project quota */ + if (osd->od_projectused_dn) { + qi->lqi_id.qid_projid = projid; + qi->lqi_type = PRJQUOTA; + rcp = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); + if (force && (rcp == -EDQUOT || rcp == -EINPROGRESS)) + rcp = 0; + } +#endif + + RETURN(rcu ? rcu : (rcg ? rcg : rcp)); }