Whamcloud - gitweb
LU-1305 osd: quota support for osd-zfs
authorAlex Zhuravlev <bzzz@whamcloud.com>
Tue, 29 May 2012 10:54:43 +0000 (14:54 +0400)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 19 Jun 2012 05:12:31 +0000 (01:12 -0400)
export of accouting information and enforcement

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: Iac5100960a5107277ccd628ba3a682cb9b03c0ed
Reviewed-on: http://review.whamcloud.com/2970
Reviewed-by: Johann Lombardi <johann@whamcloud.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/osd-zfs/osd_quota.c [new file with mode: 0644]

diff --git a/lustre/osd-zfs/osd_quota.c b/lustre/osd-zfs/osd_quota.c
new file mode 100644 (file)
index 0000000..616cf5f
--- /dev/null
@@ -0,0 +1,399 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ * Use is subject to license terms.
+ *
+ * Author: Johann Lombardi <johann@whamcloud.com>
+ */
+
+#include <lu_quota.h>
+#include <obd.h>
+#include "udmu.h"
+#include "osd_internal.h"
+
+/**
+ * Helper function to retrieve DMU object id from fid for accounting object
+ */
+uint64_t osd_quota_fid2dmu(const struct lu_fid *fid)
+{
+       LASSERT(fid_is_acct(fid));
+       if (fid_oid(fid) == ACCT_GROUP_OID)
+               return DMU_GROUPUSED_OBJECT;
+       return DMU_USERUSED_OBJECT;
+}
+
+/**
+ * Space Accounting Management
+ */
+
+/**
+ * Return space usage consumed by a given uid or gid.
+ * Block usage is accurrate since it is maintained by DMU itself.
+ * However, DMU does not provide inode accounting, so the #inodes in use
+ * is estimated from the block usage and statfs information.
+ *
+ * \param env   - is the environment passed by the caller
+ * \param dtobj - is the accounting object
+ * \param dtrec - is the record to fill with space usage information
+ * \param dtkey - is the id the of the user or group for which we would
+ *                like to access disk usage.
+ * \param capa - is the capability, not used.
+ *
+ * \retval +ve - success : exact match
+ * \retval -ve - failure
+ */
+static int osd_acct_index_lookup(const struct lu_env *env,
+                               struct dt_object *dtobj,
+                               struct dt_rec *dtrec,
+                               const struct dt_key *dtkey,
+                               struct lustre_capa *capa)
+{
+       struct osd_thread_info  *info = osd_oti_get(env);
+       char                    *buf  = info->oti_buf;
+       struct acct_rec         *rec  = (struct acct_rec *)dtrec;
+       struct osd_object       *obj = osd_dt_obj(dtobj);
+       struct osd_device       *osd = osd_obj2dev(obj);
+       int                      rc;
+       uint64_t                 oid;
+       ENTRY;
+
+       rec->bspace = rec->ispace = 0;
+
+       /* convert the 64-bit uid/gid into a string */
+       sprintf(buf, "%llx", *((__u64 *)dtkey));
+       /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be
+        * used */
+       oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu));
+
+       /* disk usage (in bytes) is maintained by DMU.
+        * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which
+        * not associated with any dmu_but_t (see dnode_special_open()).
+        * As a consequence, we cannot use udmu_zap_lookup() here since it
+        * requires a valid oo_db. */
+       rc = -zap_lookup(osd->od_objset.os, oid, buf, sizeof(uint64_t), 1,
+                       &rec->bspace);
+       if (rc == -ENOENT)
+               /* user/group has not created anything yet */
+               CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n",
+                       osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf);
+       else if (rc)
+               RETURN(rc);
+
+       if (osd->od_quota_iused_est) {
+               if (rec->bspace != 0)
+                       /* estimate #inodes in use */
+                       rec->ispace = udmu_objset_user_iused(&osd->od_objset,
+                                                            rec->bspace);
+               RETURN(+1);
+       }
+
+       /* as for inode accounting, it is not maintained by DMU, so we just
+        * use our own ZAP to track inode usage */
+       rc = -zap_lookup(osd->od_objset.os, obj->oo_db->db_object,
+                        buf, sizeof(uint64_t), 1, &rec->ispace);
+       if (rc == -ENOENT)
+               /* user/group has not created any file yet */
+               CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
+                       osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf);
+       else if (rc)
+               RETURN(rc);
+
+       RETURN(+1);
+}
+
+/**
+ * Initialize osd Iterator for given osd index object.
+ *
+ * \param  dt    - osd index object
+ * \param  attr  - not used
+ * \param  capa  - BYPASS_CAPA
+ */
+static struct dt_it *osd_it_acct_init(const struct lu_env *env,
+                                     struct dt_object *dt,
+                                     __u32 attr,
+                                     struct lustre_capa *capa)
+{
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct osd_it_quota     *it;
+       struct lu_object        *lo   = &dt->do_lu;
+       struct osd_device       *osd  = osd_dev(lo->lo_dev);
+       int                      rc;
+       ENTRY;
+
+       LASSERT(lu_object_exists(lo));
+
+       if (info == NULL)
+               RETURN(ERR_PTR(-ENOMEM));
+
+       it = &info->oti_it_quota;
+       memset(it, 0, sizeof(*it));
+       it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo));
+
+       /* initialize zap cursor */
+       rc = -udmu_zap_cursor_init(&it->oiq_zc, &osd->od_objset, it->oiq_oid,0);
+       if (rc)
+               RETURN(ERR_PTR(rc));
+
+       /* take object reference */
+       lu_object_get(lo);
+       it->oiq_obj   = osd_dt_obj(dt);
+       it->oiq_reset = 1;
+
+       RETURN((struct dt_it *)it);
+}
+
+/**
+ * Free given iterator.
+ *
+ * \param  di   - osd iterator
+ */
+static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
+{
+       struct osd_it_quota *it = (struct osd_it_quota *)di;
+       ENTRY;
+       udmu_zap_cursor_fini(it->oiq_zc);
+       lu_object_put(env, &it->oiq_obj->oo_dt.do_lu);
+       EXIT;
+}
+
+/**
+ * Move on to the next valid entry.
+ *
+ * \param  di   - osd iterator
+ *
+ * \retval +ve  - iterator reached the end
+ * \retval   0  - iterator has not reached the end yet
+ * \retval -ve  - unexpected failure
+ */
+static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
+{
+       struct osd_it_quota     *it = (struct osd_it_quota *)di;
+       int                      rc;
+       ENTRY;
+
+       if (it->oiq_reset == 0)
+               zap_cursor_advance(it->oiq_zc);
+       it->oiq_reset = 0;
+       rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32);
+       if (rc == -ENOENT) /* reached the end */
+               RETURN(+1);
+       RETURN(rc);
+}
+
+/**
+ * Return pointer to the key under iterator.
+ *
+ * \param  di   - osd iterator
+ */
+static struct dt_key *osd_it_acct_key(const struct lu_env *env,
+                                     const struct dt_it *di)
+{
+       struct osd_it_quota     *it = (struct osd_it_quota *)di;
+       struct osd_thread_info  *info = osd_oti_get(env);
+       char                    *buf  = info->oti_buf;
+       char                    *p;
+       int                      rc;
+       ENTRY;
+
+       it->oiq_reset = 0;
+       rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32);
+       if (rc)
+               RETURN(ERR_PTR(rc));
+       it->oiq_id = simple_strtoull(buf, &p, 16);
+       RETURN((struct dt_key *) &it->oiq_id);
+}
+
+/**
+ * Return size of key under iterator (in bytes)
+ *
+ * \param  di   - osd iterator
+ */
+static int osd_it_acct_key_size(const struct lu_env *env,
+                               const struct dt_it *di)
+{
+       ENTRY;
+       RETURN((int)sizeof(uint64_t));
+}
+
+/**
+ * Return pointer to the record under iterator.
+ *
+ * \param  di    - osd iterator
+ * \param  attr  - not used
+ */
+static int osd_it_acct_rec(const struct lu_env *env,
+                          const struct dt_it *di,
+                          struct dt_rec *dtrec, __u32 attr)
+{
+       struct osd_thread_info  *info = osd_oti_get(env);
+       char                    *buf  = info->oti_buf;
+       struct osd_it_quota     *it = (struct osd_it_quota *)di;
+       struct acct_rec         *rec  = (struct acct_rec *)dtrec;
+       struct osd_object       *obj = it->oiq_obj;
+       struct osd_device       *osd = osd_obj2dev(obj);
+       int                      bytes_read;
+       int                      rc;
+       ENTRY;
+
+       it->oiq_reset = 0;
+       rec->ispace = rec->bspace = 0;
+
+       /* retrieve block usage from the DMU accounting object */
+       rc = -udmu_zap_cursor_retrieve_value(env, it->oiq_zc,
+                                            (char *)&rec->bspace,
+                                            sizeof(uint64_t), &bytes_read);
+       if (rc)
+               RETURN(rc);
+
+       if (osd->od_quota_iused_est) {
+               if (rec->bspace != 0)
+                       /* estimate #inodes in use */
+                       rec->ispace = udmu_objset_user_iused(&osd->od_objset,
+                                                            rec->bspace);
+               RETURN(0);
+       }
+
+       /* retrieve key associated with the current cursor */
+       rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32);
+       if (rc)
+               RETURN(rc);
+
+       /* inode accounting is not maintained by DMU, so we use our own ZAP to
+        * track inode usage */
+       rc = -zap_lookup(osd->od_objset.os, it->oiq_obj->oo_db->db_object,
+                        buf, sizeof(uint64_t), 1, &rec->ispace);
+       if (rc == -ENOENT)
+               /* user/group has not created any file yet */
+               CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
+                       osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name, buf);
+       else if (rc)
+               RETURN(rc);
+
+       RETURN(0);
+}
+
+/**
+ * Returns cookie for current Iterator position.
+ *
+ * \param  di    - osd iterator
+ */
+static __u64 osd_it_acct_store(const struct lu_env *env,
+                              const struct dt_it *di)
+{
+       struct osd_it_quota *it = (struct osd_it_quota *)di;
+       ENTRY;
+       it->oiq_reset = 0;
+       RETURN(udmu_zap_cursor_serialize(it->oiq_zc));
+}
+
+/**
+ * Restore iterator from cookie. if the \a hash isn't found,
+ * restore the first valid record.
+ *
+ * \param  di    - osd iterator
+ * \param  hash  - iterator location cookie
+ *
+ * \retval +ve  - di points to exact matched key
+ * \retval  0   - di points to the first valid record
+ * \retval -ve  - failure
+ */
+static int osd_it_acct_load(const struct lu_env *env,
+                           const struct dt_it *di, __u64 hash)
+{
+       struct osd_it_quota     *it  = (struct osd_it_quota *)di;
+       struct osd_device       *osd = osd_obj2dev(it->oiq_obj);
+       zap_cursor_t            *zc;
+       int                      rc;
+       ENTRY;
+
+       /* create new cursor pointing to the new hash */
+       rc = -udmu_zap_cursor_init(&zc, &osd->od_objset, it->oiq_oid, hash);
+       if (rc)
+               RETURN(rc);
+       udmu_zap_cursor_fini(it->oiq_zc);
+       it->oiq_zc = zc;
+       it->oiq_reset = 0;
+
+       rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32);
+       if (rc == 0)
+               RETURN(+1);
+       else if (rc == -ENOENT)
+               RETURN(0);
+       RETURN(rc);
+}
+
+/**
+ * Move Iterator to record specified by \a key, if the \a key isn't found,
+ * move to the first valid record.
+ *
+ * \param  di   - osd iterator
+ * \param  key  - uid or gid
+ *
+ * \retval +ve  - di points to exact matched key
+ * \retval 0    - di points to the first valid record
+ * \retval -ve  - failure
+ */
+static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
+               const struct dt_key *key)
+{
+       ENTRY;
+
+       /* XXX: like osd_zap_it_get(), API is currently broken */
+       LASSERT(*((__u64 *)key) == 0);
+
+       RETURN(osd_it_acct_load(env, di, 0));
+}
+
+/**
+ * Release Iterator
+ *
+ * \param  di   - osd iterator
+ */
+static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
+{
+}
+
+/**
+ * Index and Iterator operations for accounting objects
+ */
+const struct dt_index_operations osd_acct_index_ops = {
+       .dio_lookup = osd_acct_index_lookup,
+       .dio_it     = {
+               .init           = osd_it_acct_init,
+               .fini           = osd_it_acct_fini,
+               .get            = osd_it_acct_get,
+               .put            = osd_it_acct_put,
+               .next           = osd_it_acct_next,
+               .key            = osd_it_acct_key,
+               .key_size       = osd_it_acct_key_size,
+               .rec            = osd_it_acct_rec,
+               .store          = osd_it_acct_store,
+               .load           = osd_it_acct_load
+       }
+};
+
+/**
+ * Quota Enforcement Management
+ * TODO
+ */