From: Alex Zhuravlev Date: Tue, 29 May 2012 10:54:43 +0000 (+0400) Subject: LU-1305 osd: quota support for osd-zfs X-Git-Tag: 2.2.57~9 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=b2a487553e966d8d93a8ad5e54c8ffcce37b9954 LU-1305 osd: quota support for osd-zfs export of accouting information and enforcement Signed-off-by: Alex Zhuravlev Change-Id: Iac5100960a5107277ccd628ba3a682cb9b03c0ed Reviewed-on: http://review.whamcloud.com/2970 Reviewed-by: Johann Lombardi Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lustre/osd-zfs/osd_quota.c b/lustre/osd-zfs/osd_quota.c new file mode 100644 index 0000000..616cf5f --- /dev/null +++ b/lustre/osd-zfs/osd_quota.c @@ -0,0 +1,399 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011 Whamcloud, Inc. + * Use is subject to license terms. + * + * Author: Johann Lombardi + */ + +#include +#include +#include "udmu.h" +#include "osd_internal.h" + +/** + * Helper function to retrieve DMU object id from fid for accounting object + */ +uint64_t osd_quota_fid2dmu(const struct lu_fid *fid) +{ + LASSERT(fid_is_acct(fid)); + if (fid_oid(fid) == ACCT_GROUP_OID) + return DMU_GROUPUSED_OBJECT; + return DMU_USERUSED_OBJECT; +} + +/** + * Space Accounting Management + */ + +/** + * Return space usage consumed by a given uid or gid. + * Block usage is accurrate since it is maintained by DMU itself. + * However, DMU does not provide inode accounting, so the #inodes in use + * is estimated from the block usage and statfs information. + * + * \param env - is the environment passed by the caller + * \param dtobj - is the accounting object + * \param dtrec - is the record to fill with space usage information + * \param dtkey - is the id the of the user or group for which we would + * like to access disk usage. + * \param capa - is the capability, not used. + * + * \retval +ve - success : exact match + * \retval -ve - failure + */ +static int osd_acct_index_lookup(const struct lu_env *env, + struct dt_object *dtobj, + struct dt_rec *dtrec, + const struct dt_key *dtkey, + struct lustre_capa *capa) +{ + struct osd_thread_info *info = osd_oti_get(env); + char *buf = info->oti_buf; + struct acct_rec *rec = (struct acct_rec *)dtrec; + struct osd_object *obj = osd_dt_obj(dtobj); + struct osd_device *osd = osd_obj2dev(obj); + int rc; + uint64_t oid; + ENTRY; + + rec->bspace = rec->ispace = 0; + + /* convert the 64-bit uid/gid into a string */ + sprintf(buf, "%llx", *((__u64 *)dtkey)); + /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be + * used */ + oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu)); + + /* disk usage (in bytes) is maintained by DMU. + * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which + * not associated with any dmu_but_t (see dnode_special_open()). + * As a consequence, we cannot use udmu_zap_lookup() here since it + * requires a valid oo_db. */ + rc = -zap_lookup(osd->od_objset.os, oid, buf, sizeof(uint64_t), 1, + &rec->bspace); + if (rc == -ENOENT) + /* user/group has not created anything yet */ + CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n", + osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf); + else if (rc) + RETURN(rc); + + if (osd->od_quota_iused_est) { + if (rec->bspace != 0) + /* estimate #inodes in use */ + rec->ispace = udmu_objset_user_iused(&osd->od_objset, + rec->bspace); + RETURN(+1); + } + + /* as for inode accounting, it is not maintained by DMU, so we just + * use our own ZAP to track inode usage */ + rc = -zap_lookup(osd->od_objset.os, obj->oo_db->db_object, + buf, sizeof(uint64_t), 1, &rec->ispace); + if (rc == -ENOENT) + /* user/group has not created any file yet */ + CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", + osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf); + else if (rc) + RETURN(rc); + + RETURN(+1); +} + +/** + * Initialize osd Iterator for given osd index object. + * + * \param dt - osd index object + * \param attr - not used + * \param capa - BYPASS_CAPA + */ +static struct dt_it *osd_it_acct_init(const struct lu_env *env, + struct dt_object *dt, + __u32 attr, + struct lustre_capa *capa) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct osd_it_quota *it; + struct lu_object *lo = &dt->do_lu; + struct osd_device *osd = osd_dev(lo->lo_dev); + int rc; + ENTRY; + + LASSERT(lu_object_exists(lo)); + + if (info == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + it = &info->oti_it_quota; + memset(it, 0, sizeof(*it)); + it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo)); + + /* initialize zap cursor */ + rc = -udmu_zap_cursor_init(&it->oiq_zc, &osd->od_objset, it->oiq_oid,0); + if (rc) + RETURN(ERR_PTR(rc)); + + /* take object reference */ + lu_object_get(lo); + it->oiq_obj = osd_dt_obj(dt); + it->oiq_reset = 1; + + RETURN((struct dt_it *)it); +} + +/** + * Free given iterator. + * + * \param di - osd iterator + */ +static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + ENTRY; + udmu_zap_cursor_fini(it->oiq_zc); + lu_object_put(env, &it->oiq_obj->oo_dt.do_lu); + EXIT; +} + +/** + * Move on to the next valid entry. + * + * \param di - osd iterator + * + * \retval +ve - iterator reached the end + * \retval 0 - iterator has not reached the end yet + * \retval -ve - unexpected failure + */ +static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + int rc; + ENTRY; + + if (it->oiq_reset == 0) + zap_cursor_advance(it->oiq_zc); + it->oiq_reset = 0; + rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32); + if (rc == -ENOENT) /* reached the end */ + RETURN(+1); + RETURN(rc); +} + +/** + * Return pointer to the key under iterator. + * + * \param di - osd iterator + */ +static struct dt_key *osd_it_acct_key(const struct lu_env *env, + const struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + struct osd_thread_info *info = osd_oti_get(env); + char *buf = info->oti_buf; + char *p; + int rc; + ENTRY; + + it->oiq_reset = 0; + rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32); + if (rc) + RETURN(ERR_PTR(rc)); + it->oiq_id = simple_strtoull(buf, &p, 16); + RETURN((struct dt_key *) &it->oiq_id); +} + +/** + * Return size of key under iterator (in bytes) + * + * \param di - osd iterator + */ +static int osd_it_acct_key_size(const struct lu_env *env, + const struct dt_it *di) +{ + ENTRY; + RETURN((int)sizeof(uint64_t)); +} + +/** + * Return pointer to the record under iterator. + * + * \param di - osd iterator + * \param attr - not used + */ +static int osd_it_acct_rec(const struct lu_env *env, + const struct dt_it *di, + struct dt_rec *dtrec, __u32 attr) +{ + struct osd_thread_info *info = osd_oti_get(env); + char *buf = info->oti_buf; + struct osd_it_quota *it = (struct osd_it_quota *)di; + struct acct_rec *rec = (struct acct_rec *)dtrec; + struct osd_object *obj = it->oiq_obj; + struct osd_device *osd = osd_obj2dev(obj); + int bytes_read; + int rc; + ENTRY; + + it->oiq_reset = 0; + rec->ispace = rec->bspace = 0; + + /* retrieve block usage from the DMU accounting object */ + rc = -udmu_zap_cursor_retrieve_value(env, it->oiq_zc, + (char *)&rec->bspace, + sizeof(uint64_t), &bytes_read); + if (rc) + RETURN(rc); + + if (osd->od_quota_iused_est) { + if (rec->bspace != 0) + /* estimate #inodes in use */ + rec->ispace = udmu_objset_user_iused(&osd->od_objset, + rec->bspace); + RETURN(0); + } + + /* retrieve key associated with the current cursor */ + rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32); + if (rc) + RETURN(rc); + + /* inode accounting is not maintained by DMU, so we use our own ZAP to + * track inode usage */ + rc = -zap_lookup(osd->od_objset.os, it->oiq_obj->oo_db->db_object, + buf, sizeof(uint64_t), 1, &rec->ispace); + if (rc == -ENOENT) + /* user/group has not created any file yet */ + CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", + osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf); + else if (rc) + RETURN(rc); + + RETURN(0); +} + +/** + * Returns cookie for current Iterator position. + * + * \param di - osd iterator + */ +static __u64 osd_it_acct_store(const struct lu_env *env, + const struct dt_it *di) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + ENTRY; + it->oiq_reset = 0; + RETURN(udmu_zap_cursor_serialize(it->oiq_zc)); +} + +/** + * Restore iterator from cookie. if the \a hash isn't found, + * restore the first valid record. + * + * \param di - osd iterator + * \param hash - iterator location cookie + * + * \retval +ve - di points to exact matched key + * \retval 0 - di points to the first valid record + * \retval -ve - failure + */ +static int osd_it_acct_load(const struct lu_env *env, + const struct dt_it *di, __u64 hash) +{ + struct osd_it_quota *it = (struct osd_it_quota *)di; + struct osd_device *osd = osd_obj2dev(it->oiq_obj); + zap_cursor_t *zc; + int rc; + ENTRY; + + /* create new cursor pointing to the new hash */ + rc = -udmu_zap_cursor_init(&zc, &osd->od_objset, it->oiq_oid, hash); + if (rc) + RETURN(rc); + udmu_zap_cursor_fini(it->oiq_zc); + it->oiq_zc = zc; + it->oiq_reset = 0; + + rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32); + if (rc == 0) + RETURN(+1); + else if (rc == -ENOENT) + RETURN(0); + RETURN(rc); +} + +/** + * Move Iterator to record specified by \a key, if the \a key isn't found, + * move to the first valid record. + * + * \param di - osd iterator + * \param key - uid or gid + * + * \retval +ve - di points to exact matched key + * \retval 0 - di points to the first valid record + * \retval -ve - failure + */ +static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di, + const struct dt_key *key) +{ + ENTRY; + + /* XXX: like osd_zap_it_get(), API is currently broken */ + LASSERT(*((__u64 *)key) == 0); + + RETURN(osd_it_acct_load(env, di, 0)); +} + +/** + * Release Iterator + * + * \param di - osd iterator + */ +static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di) +{ +} + +/** + * Index and Iterator operations for accounting objects + */ +const struct dt_index_operations osd_acct_index_ops = { + .dio_lookup = osd_acct_index_lookup, + .dio_it = { + .init = osd_it_acct_init, + .fini = osd_it_acct_fini, + .get = osd_it_acct_get, + .put = osd_it_acct_put, + .next = osd_it_acct_next, + .key = osd_it_acct_key, + .key_size = osd_it_acct_key_size, + .rec = osd_it_acct_rec, + .store = osd_it_acct_store, + .load = osd_it_acct_load + } +}; + +/** + * Quota Enforcement Management + * TODO + */