X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_quota.c;h=d9867145facd3a5f40cc95478e1df69d0745d8e5;hb=3222fcd13868a9fcc520af53ae1ff739ba59908f;hp=68e8dd55ae7779b976e5001f0d96f76ce604f13c;hpb=67d925cc206c2d7b215a171fe9d09bd28f9bacb0;p=fs%2Flustre-release.git diff --git a/lustre/osd-zfs/osd_quota.c b/lustre/osd-zfs/osd_quota.c index 68e8dd5..d986714 100644 --- a/lustre/osd-zfs/osd_quota.c +++ b/lustre/osd-zfs/osd_quota.c @@ -21,15 +21,14 @@ * GPL HEADER END */ /* - * Copyright (c) 2011 Whamcloud, Inc. + * Copyright (c) 2012, 2015, Intel Corporation. * Use is subject to license terms. * * Author: Johann Lombardi */ -#include +#include #include -#include "udmu.h" #include "osd_internal.h" /** @@ -44,6 +43,30 @@ uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) } /** + * Helper function to estimate the number of inodes in use for a give uid/gid + * from the block usage + */ +static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes) +{ + uint64_t refdbytes, availbytes, usedobjs, availobjs; + uint64_t uidobjs, bshift; + + /* get fresh statfs info */ + dmu_objset_space(osd->od_os, &refdbytes, &availbytes, + &usedobjs, &availobjs); + + /* estimate the number of objects based on the disk usage */ + bshift = fls64(osd->od_max_blksz) - 1; + uidobjs = osd_objs_count_estimate(refdbytes, usedobjs, + uidbytes >> bshift, bshift); + if (uidbytes > 0) + /* if we have at least 1 byte, we have at least one dnode ... */ + uidobjs = max_t(uint64_t, uidobjs, 1); + + return uidobjs; +} + +/** * Space Accounting Management */ @@ -58,7 +81,6 @@ uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) * \param dtrec - is the record to fill with space usage information * \param dtkey - is the id the of the user or group for which we would * like to access disk usage. - * \param capa - is the capability, not used. * * \retval +ve - success : exact match * \retval -ve - failure @@ -66,12 +88,11 @@ uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) static int osd_acct_index_lookup(const struct lu_env *env, struct dt_object *dtobj, struct dt_rec *dtrec, - const struct dt_key *dtkey, - struct lustre_capa *capa) + const struct dt_key *dtkey) { struct osd_thread_info *info = osd_oti_get(env); char *buf = info->oti_buf; - struct acct_rec *rec = (struct acct_rec *)dtrec; + struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; struct osd_object *obj = osd_dt_obj(dtobj); struct osd_device *osd = osd_obj2dev(obj); int rc; @@ -91,31 +112,30 @@ static int osd_acct_index_lookup(const struct lu_env *env, * not associated with any dmu_but_t (see dnode_special_open()). * As a consequence, we cannot use udmu_zap_lookup() here since it * requires a valid oo_db. */ - rc = -zap_lookup(osd->od_objset.os, oid, buf, sizeof(uint64_t), 1, + rc = -zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1, &rec->bspace); if (rc == -ENOENT) /* user/group has not created anything yet */ CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n", - osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf); + osd->od_svname, buf); else if (rc) RETURN(rc); if (osd->od_quota_iused_est) { if (rec->bspace != 0) /* estimate #inodes in use */ - rec->ispace = udmu_objset_user_iused(&osd->od_objset, - rec->bspace); + rec->ispace = osd_objset_user_iused(osd, rec->bspace); RETURN(+1); } /* as for inode accounting, it is not maintained by DMU, so we just * use our own ZAP to track inode usage */ - rc = -zap_lookup(osd->od_objset.os, obj->oo_db->db_object, + rc = -zap_lookup(osd->od_os, obj->oo_db->db_object, buf, sizeof(uint64_t), 1, &rec->ispace); if (rc == -ENOENT) /* user/group has not created any file yet */ CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", - osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf); + osd->od_svname, buf); else if (rc) RETURN(rc); @@ -127,12 +147,10 @@ static int osd_acct_index_lookup(const struct lu_env *env, * * \param dt - osd index object * \param attr - not used - * \param capa - BYPASS_CAPA */ static struct dt_it *osd_it_acct_init(const struct lu_env *env, struct dt_object *dt, - __u32 attr, - struct lustre_capa *capa) + __u32 attr) { struct osd_thread_info *info = osd_oti_get(env); struct osd_it_quota *it; @@ -146,14 +164,19 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, if (info == NULL) RETURN(ERR_PTR(-ENOMEM)); - it = &info->oti_it_quota; + OBD_ALLOC_PTR(it); + if (it == NULL) + RETURN(ERR_PTR(-ENOMEM)); + memset(it, 0, sizeof(*it)); it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo)); /* initialize zap cursor */ - rc = -udmu_zap_cursor_init(&it->oiq_zc, &osd->od_objset, it->oiq_oid,0); - if (rc) + rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0); + if (rc != 0) { + OBD_FREE_PTR(it); RETURN(ERR_PTR(rc)); + } /* take object reference */ lu_object_get(lo); @@ -170,10 +193,13 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, */ static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di) { - struct osd_it_quota *it = (struct osd_it_quota *)di; + struct osd_it_quota *it = (struct osd_it_quota *)di; ENTRY; - udmu_zap_cursor_fini(it->oiq_zc); + + osd_zap_cursor_fini(it->oiq_zc); lu_object_put(env, &it->oiq_obj->oo_dt.do_lu); + OBD_FREE_PTR(it); + EXIT; } @@ -189,15 +215,16 @@ static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di) static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di) { struct osd_it_quota *it = (struct osd_it_quota *)di; + zap_attribute_t *za = &osd_oti_get(env)->oti_za; int rc; ENTRY; if (it->oiq_reset == 0) zap_cursor_advance(it->oiq_zc); it->oiq_reset = 0; - rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32); + rc = -zap_cursor_retrieve(it->oiq_zc, za); if (rc == -ENOENT) /* reached the end */ - RETURN(+1); + rc = 1; RETURN(rc); } @@ -210,17 +237,16 @@ static struct dt_key *osd_it_acct_key(const struct lu_env *env, const struct dt_it *di) { struct osd_it_quota *it = (struct osd_it_quota *)di; - struct osd_thread_info *info = osd_oti_get(env); - char *buf = info->oti_buf; - char *p; + zap_attribute_t *za = &osd_oti_get(env)->oti_za; int rc; ENTRY; it->oiq_reset = 0; - rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32); + rc = -zap_cursor_retrieve(it->oiq_zc, za); if (rc) RETURN(ERR_PTR(rc)); - it->oiq_id = simple_strtoull(buf, &p, 16); + rc = kstrtoull(za->za_name, 16, &it->oiq_id); + RETURN((struct dt_key *) &it->oiq_id); } @@ -236,6 +262,43 @@ static int osd_it_acct_key_size(const struct lu_env *env, RETURN((int)sizeof(uint64_t)); } +/* + * zap_cursor_retrieve read from current record. + * to read bytes we need to call zap_lookup explicitly. + */ +static int osd_zap_cursor_retrieve_value(const struct lu_env *env, + zap_cursor_t *zc, char *buf, + int buf_size, int *bytes_read) +{ + zap_attribute_t *za = &osd_oti_get(env)->oti_za; + int rc, actual_size; + + rc = -zap_cursor_retrieve(zc, za); + if (unlikely(rc != 0)) + return -rc; + + if (unlikely(za->za_integer_length <= 0)) + return -ERANGE; + + actual_size = za->za_integer_length * za->za_num_integers; + + if (actual_size > buf_size) { + actual_size = buf_size; + buf_size = actual_size / za->za_integer_length; + } else { + buf_size = za->za_num_integers; + } + + rc = -zap_lookup(zc->zc_objset, zc->zc_zapobj, + za->za_name, za->za_integer_length, + buf_size, buf); + + if (likely(rc == 0)) + *bytes_read = actual_size; + + return rc; +} + /** * Return pointer to the record under iterator. * @@ -247,9 +310,9 @@ static int osd_it_acct_rec(const struct lu_env *env, struct dt_rec *dtrec, __u32 attr) { struct osd_thread_info *info = osd_oti_get(env); - char *buf = info->oti_buf; + zap_attribute_t *za = &info->oti_za; struct osd_it_quota *it = (struct osd_it_quota *)di; - struct acct_rec *rec = (struct acct_rec *)dtrec; + struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec; struct osd_object *obj = it->oiq_obj; struct osd_device *osd = osd_obj2dev(obj); int bytes_read; @@ -260,33 +323,32 @@ static int osd_it_acct_rec(const struct lu_env *env, rec->ispace = rec->bspace = 0; /* retrieve block usage from the DMU accounting object */ - rc = -udmu_zap_cursor_retrieve_value(env, it->oiq_zc, - (char *)&rec->bspace, - sizeof(uint64_t), &bytes_read); + rc = osd_zap_cursor_retrieve_value(env, it->oiq_zc, + (char *)&rec->bspace, + sizeof(uint64_t), &bytes_read); if (rc) RETURN(rc); if (osd->od_quota_iused_est) { if (rec->bspace != 0) /* estimate #inodes in use */ - rec->ispace = udmu_objset_user_iused(&osd->od_objset, - rec->bspace); + rec->ispace = osd_objset_user_iused(osd, rec->bspace); RETURN(0); } /* retrieve key associated with the current cursor */ - rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32); - if (rc) + rc = -zap_cursor_retrieve(it->oiq_zc, za); + if (unlikely(rc != 0)) RETURN(rc); /* inode accounting is not maintained by DMU, so we use our own ZAP to * track inode usage */ - rc = -zap_lookup(osd->od_objset.os, it->oiq_obj->oo_db->db_object, - buf, sizeof(uint64_t), 1, &rec->ispace); + rc = -zap_lookup(osd->od_os, it->oiq_obj->oo_db->db_object, + za->za_name, sizeof(uint64_t), 1, &rec->ispace); if (rc == -ENOENT) /* user/group has not created any file yet */ CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", - osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf); + osd->od_svname, za->za_name); else if (rc) RETURN(rc); @@ -304,7 +366,7 @@ static __u64 osd_it_acct_store(const struct lu_env *env, struct osd_it_quota *it = (struct osd_it_quota *)di; ENTRY; it->oiq_reset = 0; - RETURN(udmu_zap_cursor_serialize(it->oiq_zc)); + RETURN(osd_zap_cursor_serialize(it->oiq_zc)); } /** @@ -323,23 +385,25 @@ static int osd_it_acct_load(const struct lu_env *env, { struct osd_it_quota *it = (struct osd_it_quota *)di; struct osd_device *osd = osd_obj2dev(it->oiq_obj); + zap_attribute_t *za = &osd_oti_get(env)->oti_za; zap_cursor_t *zc; int rc; ENTRY; /* create new cursor pointing to the new hash */ - rc = -udmu_zap_cursor_init(&zc, &osd->od_objset, it->oiq_oid, hash); + rc = osd_zap_cursor_init(&zc, osd->od_os, it->oiq_oid, hash); if (rc) RETURN(rc); - udmu_zap_cursor_fini(it->oiq_zc); + osd_zap_cursor_fini(it->oiq_zc); it->oiq_zc = zc; it->oiq_reset = 0; - rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32); + rc = -zap_cursor_retrieve(it->oiq_zc, za); if (rc == 0) - RETURN(+1); + rc = 1; else if (rc == -ENOENT) - RETURN(0); + rc = 0; + RETURN(rc); } @@ -395,5 +459,67 @@ const struct dt_index_operations osd_acct_index_ops = { /** * Quota Enforcement Management - * TODO */ + +/* + * Wrapper for qsd_op_begin(). + * + * \param env - the environment passed by the caller + * \param osd - is the osd_device + * \param uid - user id of the inode + * \param gid - group id of the inode + * \param space - how many blocks/inodes will be consumed/released + * \param oh - osd transaction handle + * \param is_blk - block quota or inode quota? + * \param flags - if the operation is write, return no user quota, no + * group quota, or sync commit flags to the caller + * \param force - set to 1 when changes are performed by root user and thus + * can't failed with EDQUOT + * + * \retval 0 - success + * \retval -ve - failure + */ +int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, + qid_t uid, qid_t gid, long long space, + struct osd_thandle *oh, bool is_blk, int *flags, + bool force) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lquota_id_info *qi = &info->oti_qi; + struct qsd_instance *qsd = osd->od_quota_slave; + int rcu, rcg; /* user & group rc */ + ENTRY; + + if (unlikely(qsd == NULL)) + /* quota slave instance hasn't been allocated yet */ + RETURN(0); + + /* let's start with user quota */ + qi->lqi_id.qid_uid = uid; + qi->lqi_type = USRQUOTA; + qi->lqi_space = space; + qi->lqi_is_blk = is_blk; + rcu = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); + + if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS)) + /* ignore EDQUOT & EINPROGRESS when changes are done by root */ + rcu = 0; + + /* For non-fatal error, we want to continue to get the noquota flags + * for group id. This is only for commit write, which has @flags passed + * in. See osd_declare_write_commit(). + * When force is set to true, we also want to proceed with the gid */ + if (rcu && (rcu != -EDQUOT || flags == NULL)) + RETURN(rcu); + + /* and now group quota */ + qi->lqi_id.qid_gid = gid; + qi->lqi_type = GRPQUOTA; + rcg = qsd_op_begin(env, qsd, &oh->ot_quota_trans, qi, flags); + + if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS)) + /* as before, ignore EDQUOT & EINPROGRESS for root */ + rcg = 0; + + RETURN(rcu ? rcu : rcg); +}