X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_quota.c;h=45b68ff556443e6b5593d8ca505948053952c61e;hb=refs%2Fchanges%2F93%2F27093%2F40;hp=7faa28732b8b574c02e94254b61c23682a6ac7ec;hpb=08aa217ce49aba1ded52e0f7adb8a607035123fd;p=fs%2Flustre-release.git diff --git a/lustre/osd-zfs/osd_quota.c b/lustre/osd-zfs/osd_quota.c index 7faa287..45b68ff 100644 --- a/lustre/osd-zfs/osd_quota.c +++ b/lustre/osd-zfs/osd_quota.c @@ -21,7 +21,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012, 2015, Intel Corporation. * Use is subject to license terms. * * Author: Johann Lombardi @@ -29,18 +29,30 @@ #include #include -#include "udmu.h" #include "osd_internal.h" /** - * Helper function to retrieve DMU object id from fid for accounting object + * Helper function to estimate the number of inodes in use for the given + * uid/gid/projid from the block usage */ -uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) +static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes) { - LASSERT(fid_is_acct(fid)); - if (fid_oid(fid) == ACCT_GROUP_OID) - return DMU_GROUPUSED_OBJECT; - return DMU_USERUSED_OBJECT; + uint64_t refdbytes, availbytes, usedobjs, availobjs; + uint64_t uidobjs, bshift; + + /* get fresh statfs info */ + dmu_objset_space(osd->od_os, &refdbytes, &availbytes, + &usedobjs, &availobjs); + + /* estimate the number of objects based on the disk usage */ + bshift = fls64(osd->od_max_blksz) - 1; + uidobjs = osd_objs_count_estimate(refdbytes, usedobjs, + uidbytes >> bshift, bshift); + if (uidbytes > 0) + /* if we have at least 1 byte, we have at least one dnode ... */ + uidobjs = max_t(uint64_t, uidobjs, 1); + + return uidobjs; } /** @@ -48,7 +60,7 @@ uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) */ /** - * Return space usage consumed by a given uid or gid. + * Return space usage consumed by a given uid or gid or projid. * Block usage is accurrate since it is maintained by DMU itself. * However, DMU does not provide inode accounting, so the #inodes in use * is estimated from the block usage and statfs information. @@ -58,7 +70,6 @@ uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) * \param dtrec - is the record to fill with space usage information * \param dtkey - is the id the of the user or group for which we would * like to access disk usage. - * \param capa - is the capability, not used. * * \retval +ve - success : exact match * \retval -ve - failure @@ -66,60 +77,66 @@ uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) static int osd_acct_index_lookup(const struct lu_env *env, struct dt_object *dtobj, struct dt_rec *dtrec, - const struct dt_key *dtkey, - struct lustre_capa *capa) + const struct dt_key *dtkey) { - struct osd_thread_info *info = osd_oti_get(env); - char *buf = info->oti_buf; - struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; - struct osd_object *obj = osd_dt_obj(dtobj); - struct osd_device *osd = osd_obj2dev(obj); - int rc; - uint64_t oid; + struct osd_thread_info *info = osd_oti_get(env); + char *buf = info->oti_buf; + struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; + struct osd_object *obj = osd_dt_obj(dtobj); + struct osd_device *osd = osd_obj2dev(obj); + dnode_t *dn = obj->oo_dn; + size_t buflen = sizeof(info->oti_buf); + int rc; ENTRY; rec->bspace = rec->ispace = 0; - /* convert the 64-bit uid/gid into a string */ - sprintf(buf, "%llx", *((__u64 *)dtkey)); - /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be - * used */ - oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu)); + /* convert the 64-bit uid/gid/projid into a string */ + snprintf(buf, buflen, "%llx", *((__u64 *)dtkey)); + if (unlikely(!dn)) { + CDEBUG(D_QUOTA, "%s: miss accounting obj for %s\n", + osd->od_svname, buf); + + RETURN(-ENOENT); + } /* disk usage (in bytes) is maintained by DMU. * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which - * not associated with any dmu_but_t (see dnode_special_open()). - * As a consequence, we cannot use udmu_zap_lookup() here since it - * requires a valid oo_db. */ - rc = -zap_lookup(osd->od_objset.os, oid, buf, sizeof(uint64_t), 1, - &rec->bspace); - if (rc == -ENOENT) - /* user/group has not created anything yet */ + * not associated with any dmu_but_t (see dnode_special_open()). */ + rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, sizeof(uint64_t), 1, + &rec->bspace); + if (rc == -ENOENT) { + /* user/group/project has not created anything yet */ CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n", osd->od_svname, buf); - else if (rc) + /* -ENOENT is normal case, convert it as 1. */ + rc = 1; + } else if (rc) { RETURN(rc); + } - if (osd->od_quota_iused_est) { + if (!osd_dmu_userobj_accounting_available(osd)) { if (rec->bspace != 0) /* estimate #inodes in use */ - rec->ispace = udmu_objset_user_iused(&osd->od_objset, - rec->bspace); - RETURN(+1); + rec->ispace = osd_objset_user_iused(osd, rec->bspace); + rc = 1; + } else { + snprintf(buf, buflen, OSD_DMU_USEROBJ_PREFIX "%llx", + *((__u64 *)dtkey)); + rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, + sizeof(uint64_t), 1, &rec->ispace); + if (rc == -ENOENT) { + CDEBUG(D_QUOTA, + "%s: id %s not found dnode accounting\n", + osd->od_svname, buf); + /* -ENOENT is normal case, convert it as 1. */ + rc = 1; + } else if (rc == 0) { + rc = 1; + } } - /* as for inode accounting, it is not maintained by DMU, so we just - * use our own ZAP to track inode usage */ - rc = -zap_lookup(osd->od_objset.os, obj->oo_db->db_object, - buf, sizeof(uint64_t), 1, &rec->ispace); - if (rc == -ENOENT) - /* user/group has not created any file yet */ - CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", - osd->od_svname, buf); - else if (rc) - RETURN(rc); - - RETURN(+1); + RETURN(rc); } /** @@ -127,36 +144,45 @@ static int osd_acct_index_lookup(const struct lu_env *env, * * \param dt - osd index object * \param attr - not used - * \param capa - BYPASS_CAPA */ static struct dt_it *osd_it_acct_init(const struct lu_env *env, struct dt_object *dt, - __u32 attr, - struct lustre_capa *capa) + __u32 attr) { - struct osd_thread_info *info = osd_oti_get(env); - struct osd_it_quota *it; - struct lu_object *lo = &dt->do_lu; - struct osd_device *osd = osd_dev(lo->lo_dev); - int rc; + struct osd_thread_info *info = osd_oti_get(env); + struct osd_it_quota *it; + struct osd_object *obj = osd_dt_obj(dt); + struct osd_device *osd = osd_obj2dev(obj); + dnode_t *dn = obj->oo_dn; + int rc; ENTRY; - LASSERT(lu_object_exists(lo)); + if (unlikely(!dn)) { + CDEBUG(D_QUOTA, "%s: Not found in DMU accounting ZAP\n", + osd->od_svname); + + RETURN(ERR_PTR(-ENOENT)); + } if (info == NULL) RETURN(ERR_PTR(-ENOMEM)); - it = &info->oti_it_quota; + OBD_ALLOC_PTR(it); + if (it == NULL) + RETURN(ERR_PTR(-ENOMEM)); + memset(it, 0, sizeof(*it)); - it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo)); + it->oiq_oid = dn->dn_object; /* initialize zap cursor */ - rc = -udmu_zap_cursor_init(&it->oiq_zc, &osd->od_objset, it->oiq_oid,0); - if (rc) + rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0); + if (rc != 0) { + OBD_FREE_PTR(it); RETURN(ERR_PTR(rc)); + } /* take object reference */ - lu_object_get(lo); + lu_object_get(&dt->do_lu); it->oiq_obj = osd_dt_obj(dt); it->oiq_reset = 1; @@ -170,14 +196,40 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, */ static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di) { - struct osd_it_quota *it = (struct osd_it_quota *)di; + struct osd_it_quota *it = (struct osd_it_quota *)di; ENTRY; - udmu_zap_cursor_fini(it->oiq_zc); - lu_object_put(env, &it->oiq_obj->oo_dt.do_lu); + + osd_zap_cursor_fini(it->oiq_zc); + osd_object_put(env, it->oiq_obj); + OBD_FREE_PTR(it); + EXIT; } /** + * Locate the first entry that is for space accounting. + */ +static int osd_zap_locate(struct osd_it_quota *it, zap_attribute_t *za) +{ + int rc; + ENTRY; + + while (1) { + rc = -zap_cursor_retrieve(it->oiq_zc, za); + if (rc) + break; + + if (strncmp(za->za_name, OSD_DMU_USEROBJ_PREFIX, + OSD_DMU_USEROBJ_PREFIX_LEN)) + break; + + zap_cursor_advance(it->oiq_zc); + } + + RETURN(rc); +} + +/** * Move on to the next valid entry. * * \param di - osd iterator @@ -189,16 +241,16 @@ static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di) static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di) { struct osd_it_quota *it = (struct osd_it_quota *)di; + zap_attribute_t *za = &osd_oti_get(env)->oti_za; int rc; ENTRY; if (it->oiq_reset == 0) zap_cursor_advance(it->oiq_zc); it->oiq_reset = 0; - rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32); - if (rc == -ENOENT) /* reached the end */ - RETURN(+1); - RETURN(rc); + + rc = osd_zap_locate(it, za); + RETURN(rc == -ENOENT ? 1 : rc); } /** @@ -210,17 +262,19 @@ static struct dt_key *osd_it_acct_key(const struct lu_env *env, const struct dt_it *di) { struct osd_it_quota *it = (struct osd_it_quota *)di; - struct osd_thread_info *info = osd_oti_get(env); - char *buf = info->oti_buf; - char *p; + zap_attribute_t *za = &osd_oti_get(env)->oti_za; int rc; ENTRY; it->oiq_reset = 0; - rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32); + rc = osd_zap_locate(it, za); if (rc) RETURN(ERR_PTR(rc)); - it->oiq_id = simple_strtoull(buf, &p, 16); + + rc = kstrtoull(za->za_name, 16, &it->oiq_id); + if (rc) + CERROR("couldn't parse name %s\n", za->za_name); + RETURN((struct dt_key *) &it->oiq_id); } @@ -236,6 +290,47 @@ static int osd_it_acct_key_size(const struct lu_env *env, RETURN((int)sizeof(uint64_t)); } +/* + * zap_cursor_retrieve read from current record. + * to read bytes we need to call zap_lookup explicitly. + */ +static int osd_zap_cursor_retrieve_value(const struct lu_env *env, + struct osd_it_quota *it, + char *buf, int buf_size, + int *bytes_read) +{ + const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu); + zap_attribute_t *za = &osd_oti_get(env)->oti_za; + zap_cursor_t *zc = it->oiq_zc; + struct osd_device *osd = osd_obj2dev(it->oiq_obj); + int rc, actual_size; + + rc = -zap_cursor_retrieve(zc, za); + if (unlikely(rc != 0)) + return rc; + + if (unlikely(za->za_integer_length <= 0)) + return -ERANGE; + + actual_size = za->za_integer_length * za->za_num_integers; + + if (actual_size > buf_size) { + actual_size = buf_size; + buf_size = actual_size / za->za_integer_length; + } else { + buf_size = za->za_num_integers; + } + + /* use correct special ID to request bytes used */ + rc = osd_zap_lookup(osd, fid_oid(fid) == ACCT_GROUP_OID ? + DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT, NULL, + za->za_name, za->za_integer_length, buf_size, buf); + if (likely(rc == 0)) + *bytes_read = actual_size; + + return rc; +} + /** * Return pointer to the record under iterator. * @@ -247,7 +342,7 @@ static int osd_it_acct_rec(const struct lu_env *env, struct dt_rec *dtrec, __u32 attr) { struct osd_thread_info *info = osd_oti_get(env); - char *buf = info->oti_buf; + zap_attribute_t *za = &info->oti_za; struct osd_it_quota *it = (struct osd_it_quota *)di; struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; struct osd_object *obj = it->oiq_obj; @@ -260,33 +355,35 @@ static int osd_it_acct_rec(const struct lu_env *env, rec->ispace = rec->bspace = 0; /* retrieve block usage from the DMU accounting object */ - rc = -udmu_zap_cursor_retrieve_value(env, it->oiq_zc, - (char *)&rec->bspace, - sizeof(uint64_t), &bytes_read); + rc = osd_zap_cursor_retrieve_value(env, it, (char *)&rec->bspace, + sizeof(uint64_t), &bytes_read); if (rc) RETURN(rc); - if (osd->od_quota_iused_est) { + if (!osd_dmu_userobj_accounting_available(osd)) { if (rec->bspace != 0) /* estimate #inodes in use */ - rec->ispace = udmu_objset_user_iused(&osd->od_objset, - rec->bspace); + rec->ispace = osd_objset_user_iused(osd, rec->bspace); RETURN(0); } /* retrieve key associated with the current cursor */ - rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32); - if (rc) + rc = -zap_cursor_retrieve(it->oiq_zc, za); + if (unlikely(rc != 0)) RETURN(rc); - /* inode accounting is not maintained by DMU, so we use our own ZAP to - * track inode usage */ - rc = -zap_lookup(osd->od_objset.os, it->oiq_obj->oo_db->db_object, - buf, sizeof(uint64_t), 1, &rec->ispace); + /* inode accounting is maintained by DMU since 0.7.0 */ + strncpy(info->oti_buf, OSD_DMU_USEROBJ_PREFIX, + OSD_DMU_USEROBJ_PREFIX_LEN); + strlcpy(info->oti_buf + OSD_DMU_USEROBJ_PREFIX_LEN, za->za_name, + sizeof(info->oti_buf) - OSD_DMU_USEROBJ_PREFIX_LEN); + rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object, + it->oiq_obj->oo_dn, info->oti_buf, sizeof(uint64_t), + 1, &rec->ispace); if (rc == -ENOENT) /* user/group has not created any file yet */ CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", - osd->od_svname, buf); + osd->od_svname, info->oti_buf); else if (rc) RETURN(rc); @@ -304,7 +401,7 @@ static __u64 osd_it_acct_store(const struct lu_env *env, struct osd_it_quota *it = (struct osd_it_quota *)di; ENTRY; it->oiq_reset = 0; - RETURN(udmu_zap_cursor_serialize(it->oiq_zc)); + RETURN(osd_zap_cursor_serialize(it->oiq_zc)); } /** @@ -323,23 +420,24 @@ static int osd_it_acct_load(const struct lu_env *env, { struct osd_it_quota *it = (struct osd_it_quota *)di; struct osd_device *osd = osd_obj2dev(it->oiq_obj); + zap_attribute_t *za = &osd_oti_get(env)->oti_za; zap_cursor_t *zc; int rc; ENTRY; /* create new cursor pointing to the new hash */ - rc = -udmu_zap_cursor_init(&zc, &osd->od_objset, it->oiq_oid, hash); + rc = osd_zap_cursor_init(&zc, osd->od_os, it->oiq_oid, hash); if (rc) RETURN(rc); - udmu_zap_cursor_fini(it->oiq_zc); + osd_zap_cursor_fini(it->oiq_zc); it->oiq_zc = zc; it->oiq_reset = 0; - rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32); + rc = osd_zap_locate(it, za); if (rc == 0) - RETURN(+1); + rc = 1; else if (rc == -ENOENT) - RETURN(0); + rc = 0; RETURN(rc); } @@ -348,7 +446,7 @@ static int osd_it_acct_load(const struct lu_env *env, * move to the first valid record. * * \param di - osd iterator - * \param key - uid or gid + * \param key - uid or gid or projid * * \retval +ve - di points to exact matched key * \retval 0 - di points to the first valid record @@ -404,26 +502,27 @@ const struct dt_index_operations osd_acct_index_ops = { * \param osd - is the osd_device * \param uid - user id of the inode * \param gid - group id of the inode + * \param projid - project id of the inode * \param space - how many blocks/inodes will be consumed/released * \param oh - osd transaction handle - * \param is_blk - block quota or inode quota? * \param flags - if the operation is write, return no user quota, no * group quota, or sync commit flags to the caller - * \param force - set to 1 when changes are performed by root user and thus - * can't failed with EDQUOT + * \param osd_qid_declare_flags - indicate this is a inode/block accounting + * and whether changes are performed by root user * * \retval 0 - success * \retval -ve - failure */ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, - qid_t uid, qid_t gid, long long space, - struct osd_thandle *oh, bool is_blk, int *flags, - bool force) + qid_t uid, qid_t gid, qid_t projid, long long space, + struct osd_thandle *oh, int *flags, + enum osd_qid_declare_flags osd_qid_declare_flags) { - struct osd_thread_info *info = osd_oti_get(env); - struct lquota_id_info *qi = &info->oti_qi; - struct qsd_instance *qsd = osd->od_quota_slave; - int rcu, rcg; /* user & group rc */ + struct osd_thread_info *info = osd_oti_get(env); + struct lquota_id_info *qi = &info->oti_qi; + struct qsd_instance *qsd = osd->od_quota_slave; + int rcu, rcg, rcp = 0; /* user & group & project rc */ + bool force = !!(osd_qid_declare_flags & OSD_QID_FORCE); ENTRY; if (unlikely(qsd == NULL)) @@ -434,9 +533,8 @@ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, qi->lqi_id.qid_uid = uid; qi->lqi_type = USRQUOTA; qi->lqi_space = space; - qi->lqi_is_blk = is_blk; + qi->lqi_is_blk = !!(osd_qid_declare_flags & OSD_QID_BLK); rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); - if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS)) /* ignore EDQUOT & EINPROGRESS when changes are done by root */ rcu = 0; @@ -452,10 +550,23 @@ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, qi->lqi_id.qid_gid = gid; qi->lqi_type = GRPQUOTA; rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); - if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS)) /* as before, ignore EDQUOT & EINPROGRESS for root */ rcg = 0; - RETURN(rcu ? rcu : rcg); +#ifdef ZFS_PROJINHERIT + if (rcg && (rcg != -EDQUOT || flags == NULL)) + RETURN(rcg); + + /* for project quota */ + if (osd->od_projectused_dn) { + qi->lqi_id.qid_projid = projid; + qi->lqi_type = PRJQUOTA; + rcp = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); + if (force && (rcp == -EDQUOT || rcp == -EINPROGRESS)) + rcp = 0; + } +#endif + + RETURN(rcu ? rcu : (rcg ? rcg : rcp)); }