many small changes to get rid of udmu wrappers.
Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Change-Id: Ic8746345da1e6695149bacf066be10bf284aecdf
Reviewed-on: http://review.whamcloud.com/9721
Tested-by: Jenkins
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
MODULES := osd_zfs
-osd_zfs-objs := osd_handler.o osd_lproc.o udmu.o osd_quota.o
+osd_zfs-objs := osd_handler.o osd_lproc.o osd_quota.o
osd_zfs-objs += osd_object.o osd_io.o osd_oi.o osd_xattr.o osd_index.o
EXTRA_PRE_CFLAGS += -include @SPL_OBJ@/spl_config.h
endif
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-EXTRA_DIST := $(osd_zfs-objs:%.o=%.c) osd_internal.h udmu.h
+EXTRA_DIST := $(osd_zfs-objs:%.o=%.c) osd_internal.h
struct lu_context_key osd_key;
-static char *root_tag = "osd_mount, rootdb";
-
/* Slab for OSD object allocation */
struct kmem_cache *osd_object_kmem;
dmu_tx_commit(oh->ot_tx);
if (th->th_sync)
- txg_wait_synced(dmu_objset_pool(osd->od_objset.os), txg);
+ txg_wait_synced(dmu_objset_pool(osd->od_os), txg);
RETURN(rc);
}
dmu_tx_t *tx;
ENTRY;
- tx = dmu_tx_create(osd->od_objset.os);
+ tx = dmu_tx_create(osd->od_os);
if (tx == NULL)
RETURN(ERR_PTR(-ENOMEM));
RETURN(th);
}
+/* Estimate the number of objects from a number of blocks */
+uint64_t osd_objs_count_estimate(uint64_t refdbytes, uint64_t usedobjs,
+ uint64_t nrblocks)
+{
+ uint64_t est_objs, est_refdblocks, est_usedobjs;
+
+ /* Compute an nrblocks estimate based on the actual number of
+ * dnodes that could fit in the space. Since we don't know the
+ * overhead associated with each dnode (xattrs, SAs, VDEV overhead,
+ * etc) just using DNODE_SHIFT isn't going to give a good estimate.
+ * Instead, compute an estimate based on the average space usage per
+ * dnode, with an upper and lower cap.
+ *
+ * In case there aren't many dnodes or blocks used yet, add a small
+ * correction factor using OSD_DNODE_EST_SHIFT. This correction
+ * factor gradually disappears as the number of real dnodes grows.
+ * This also avoids the need to check for divide-by-zero later.
+ */
+ CLASSERT(OSD_DNODE_MIN_BLKSHIFT > 0);
+ CLASSERT(OSD_DNODE_EST_BLKSHIFT > 0);
+
+ est_refdblocks = (refdbytes >> SPA_MAXBLOCKSHIFT) +
+ (OSD_DNODE_EST_COUNT >> OSD_DNODE_EST_BLKSHIFT);
+ est_usedobjs = usedobjs + OSD_DNODE_EST_COUNT;
+
+ /* Average space/dnode more than maximum dnode size, use max dnode
+ * size to estimate free dnodes from adjusted free blocks count.
+ * OSTs typically use more than one block dnode so this case applies. */
+ if (est_usedobjs <= est_refdblocks * 2) {
+ est_objs = nrblocks;
+
+ /* Average space/dnode smaller than min dnode size (probably due to
+ * metadnode compression), use min dnode size to estimate the number of
+ * objects.
+ * An MDT typically uses below 512 bytes/dnode so this case applies. */
+ } else if (est_usedobjs >= (est_refdblocks << OSD_DNODE_MIN_BLKSHIFT)) {
+ est_objs = nrblocks << OSD_DNODE_MIN_BLKSHIFT;
+
+ /* Between the extremes, we try to use the average size of
+ * existing dnodes to compute the number of dnodes that fit
+ * into nrblocks:
+ *
+ * est_objs = nrblocks * (est_usedobjs / est_refblocks);
+ *
+ * but this may overflow 64 bits or become 0 if not handled well
+ *
+ * We know nrblocks is below (64 - 17 = 47) bits from
+ * SPA_MAXBLKSHIFT, and est_usedobjs is under 48 bits due to
+ * DN_MAX_OBJECT_SHIFT, which means that multiplying them may
+ * get as large as 2 ^ 95.
+ *
+ * We also know (est_usedobjs / est_refdblocks) is between 2 and
+ * 256, due to above checks, we can safely compute this first.
+ * We care more about accuracy on the MDT (many dnodes/block)
+ * which is good because this is where truncation errors are
+ * smallest. This adds 8 bits to nrblocks so we can use 7 bits
+ * to compute a fixed-point fraction and nrblocks can still fit
+ * in 64 bits. */
+ } else {
+ unsigned dnodes_per_block = (est_usedobjs << 7)/est_refdblocks;
+
+ est_objs = (nrblocks * dnodes_per_block) >> 7;
+ }
+ return est_objs;
+}
+
+static int osd_objset_statfs(struct objset *os, struct obd_statfs *osfs)
+{
+ uint64_t refdbytes, availbytes, usedobjs, availobjs;
+ uint64_t est_availobjs;
+ uint64_t reserved;
+
+ dmu_objset_space(os, &refdbytes, &availbytes, &usedobjs,
+ &availobjs);
+
+ /*
+ * ZFS allows multiple block sizes. For statfs, Linux makes no
+ * proper distinction between bsize and frsize. For calculations
+ * of free and used blocks incorrectly uses bsize instead of frsize,
+ * but bsize is also used as the optimal blocksize. We return the
+ * largest possible block size as IO size for the optimum performance
+ * and scale the free and used blocks count appropriately.
+ */
+ osfs->os_bsize = 1ULL << SPA_MAXBLOCKSHIFT;
+
+ osfs->os_blocks = (refdbytes + availbytes) >> SPA_MAXBLOCKSHIFT;
+ osfs->os_bfree = availbytes >> SPA_MAXBLOCKSHIFT;
+ osfs->os_bavail = osfs->os_bfree; /* no extra root reservation */
+
+ /* Take replication (i.e. number of copies) into account */
+ osfs->os_bavail /= os->os_copies;
+
+ /*
+ * Reserve some space so we don't run into ENOSPC due to grants not
+ * accounting for metadata overhead in ZFS, and to avoid fragmentation.
+ * Rather than report this via os_bavail (which makes users unhappy if
+ * they can't fill the filesystem 100%), reduce os_blocks as well.
+ *
+ * Reserve 0.78% of total space, at least 4MB for small filesystems,
+ * for internal files to be created/unlinked when space is tight.
+ */
+ CLASSERT(OSD_STATFS_RESERVED_BLKS > 0);
+ if (likely(osfs->os_blocks >=
+ OSD_STATFS_RESERVED_BLKS << OSD_STATFS_RESERVED_SHIFT))
+ reserved = osfs->os_blocks >> OSD_STATFS_RESERVED_SHIFT;
+ else
+ reserved = OSD_STATFS_RESERVED_BLKS;
+
+ osfs->os_blocks -= reserved;
+ osfs->os_bfree -= MIN(reserved, osfs->os_bfree);
+ osfs->os_bavail -= MIN(reserved, osfs->os_bavail);
+
+ /*
+ * The availobjs value returned from dmu_objset_space() is largely
+ * useless, since it reports the number of objects that might
+ * theoretically still fit into the dataset, independent of minor
+ * issues like how much space is actually available in the pool.
+ * Compute a better estimate in udmu_objs_count_estimate().
+ */
+ est_availobjs = osd_objs_count_estimate(refdbytes, usedobjs,
+ osfs->os_bfree);
+
+ osfs->os_ffree = min(availobjs, est_availobjs);
+ osfs->os_files = osfs->os_ffree + usedobjs;
+
+ /* ZFS XXX: fill in backing dataset FSID/UUID
+ memcpy(osfs->os_fsid, .... );*/
+
+ /* We're a zfs filesystem. */
+ osfs->os_type = UBERBLOCK_MAGIC;
+
+ /* ZFS XXX: fill in appropriate OS_STATE_{DEGRADED,READONLY} flags
+ osfs->os_state = vf_to_stf(vfsp->vfs_flag);
+ if (sb->s_flags & MS_RDONLY)
+ osfs->os_state = OS_STATE_READONLY;
+ */
+
+ osfs->os_namelen = MAXNAMELEN;
+ osfs->os_maxbytes = OBD_OBJECT_EOF;
+
+ return 0;
+}
+
/*
* Concurrency: shouldn't matter.
*/
int rc;
ENTRY;
- rc = udmu_objset_statfs(&osd->od_objset, osfs);
- if (unlikely(rc))
+ rc = osd_objset_statfs(osd->od_os, osfs);
+ if (unlikely(rc != 0))
RETURN(rc);
+
osfs->os_bavail -= min_t(obd_size,
OSD_GRANT_FOR_LOCAL_OIDS / osfs->os_bsize,
osfs->os_bavail);
RETURN(0);
}
+static int osd_blk_insert_cost(void)
+{
+ int max_blockshift, nr_blkptrshift;
+
+ /* max_blockshift is the log2 of the number of blocks needed to reach
+ * the maximum filesize (that's to say 2^64) */
+ max_blockshift = DN_MAX_OFFSET_SHIFT - SPA_MAXBLOCKSHIFT;
+
+ /* nr_blkptrshift is the log2 of the number of block pointers that can
+ * be stored in an indirect block */
+ CLASSERT(DN_MAX_INDBLKSHIFT > SPA_BLKPTRSHIFT);
+ nr_blkptrshift = DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT;
+
+ /* max_blockshift / nr_blkptrshift is thus the maximum depth of the
+ * tree. We add +1 for rounding purpose.
+ * The tree depth times the indirect block size gives us the maximum
+ * cost of inserting a block in the tree */
+ return (max_blockshift / nr_blkptrshift + 1) * (1<<DN_MAX_INDBLKSHIFT);
+}
+
/*
* Concurrency: doesn't access mutable data.
*/
* estimate the real size consumed by an object */
param->ddp_inodespace = OSD_DNODE_EST_COUNT;
/* per-fragment overhead to be used by the client code */
- param->ddp_grant_frag = udmu_blk_insert_cost();
+ param->ddp_grant_frag = osd_blk_insert_cost();
}
/*
{
struct osd_device *osd = osd_dt_dev(d);
CDEBUG(D_HA, "syncing OSD %s\n", LUSTRE_OSD_ZFS_NAME);
- txg_wait_synced(dmu_objset_pool(osd->od_objset.os), 0ULL);
+ txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL);
return 0;
}
static int osd_commit_async(const struct lu_env *env, struct dt_device *dev)
{
struct osd_device *osd = osd_dt_dev(dev);
- tx_state_t *tx = &dmu_objset_pool(osd->od_objset.os)->dp_tx;
+ tx_state_t *tx = &dmu_objset_pool(osd->od_os)->dp_tx;
uint64_t txg;
mutex_enter(&tx->tx_sync_lock);
CERROR("%s: *** setting device %s read-only ***\n",
osd->od_svname, LUSTRE_OSD_ZFS_NAME);
osd->od_rdonly = 1;
- spa_freeze(dmu_objset_spa(osd->od_objset.os));
+ spa_freeze(dmu_objset_spa(osd->od_os));
RETURN(0);
}
osd->od_xattr_in_sa = (newval == ZFS_XATTR_SA);
}
+static int osd_objset_open(struct osd_device *o)
+{
+ uint64_t version = ZPL_VERSION;
+ uint64_t sa_obj;
+ int rc;
+ ENTRY;
+
+ rc = -dmu_objset_own(o->od_mntdev, DMU_OST_ZFS, B_FALSE, o, &o->od_os);
+ if (rc) {
+ o->od_os = NULL;
+ goto out;
+ }
+
+ /* Check ZFS version */
+ rc = -zap_lookup(o->od_os, MASTER_NODE_OBJ,
+ ZPL_VERSION_STR, 8, 1, &version);
+ if (rc) {
+ CERROR("%s: Error looking up ZPL VERSION\n", o->od_mntdev);
+ /*
+ * We can't return ENOENT because that would mean the objset
+ * didn't exist.
+ */
+ GOTO(out, rc = -EIO);
+ }
+
+ rc = -zap_lookup(o->od_os, MASTER_NODE_OBJ,
+ ZFS_SA_ATTRS, 8, 1, &sa_obj);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = -sa_setup(o->od_os, sa_obj, zfs_attr_table,
+ ZPL_END, &o->z_attr_table);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = -zap_lookup(o->od_os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ,
+ 8, 1, &o->od_rootid);
+ if (rc) {
+ CERROR("%s: lookup for root failed: rc = %d\n",
+ o->od_svname, rc);
+ GOTO(out, rc);
+ }
+
+ /* Check that user/group usage tracking is supported */
+ if (!dmu_objset_userused_enabled(o->od_os) ||
+ DMU_USERUSED_DNODE(o->od_os)->dn_type != DMU_OT_USERGROUP_USED ||
+ DMU_GROUPUSED_DNODE(o->od_os)->dn_type != DMU_OT_USERGROUP_USED) {
+ CERROR("%s: Space accounting not supported by this target, "
+ "aborting\n", o->od_svname);
+ GOTO(out, -ENOTSUPP);
+ }
+
+out:
+ if (rc != 0 && o->od_os != NULL)
+ dmu_objset_disown(o->od_os, o);
+
+ RETURN(rc);
+}
+
static int osd_mount(const struct lu_env *env,
struct osd_device *o, struct lustre_cfg *cfg)
{
int rc;
ENTRY;
- if (o->od_objset.os != NULL)
+ if (o->od_os != NULL)
RETURN(0);
if (mntdev == NULL || svname == NULL)
if (server_name_is_ost(o->od_svname))
o->od_is_ost = 1;
- rc = -udmu_objset_open(o->od_mntdev, &o->od_objset);
+ rc = osd_objset_open(o);
if (rc) {
- CERROR("can't open objset %s: %d\n", o->od_mntdev, rc);
+ CERROR("%s: can't open objset %s: rc = %d\n", o->od_svname,
+ o->od_mntdev, rc);
RETURN(rc);
}
- ds = dmu_objset_ds(o->od_objset.os);
- dp = dmu_objset_pool(o->od_objset.os);
+ ds = dmu_objset_ds(o->od_os);
+ dp = dmu_objset_pool(o->od_os);
LASSERT(ds);
LASSERT(dp);
dsl_pool_config_enter(dp, FTAG);
rc = dsl_prop_register(ds, "xattr", osd_xattr_changed_cb, o);
dsl_pool_config_exit(dp, FTAG);
if (rc)
- CERROR("%s: cat not register xattr callback, ignore: %d\n",
- o->od_svname, rc);
+ CWARN("%s: can't register xattr callback, ignore: rc=%d\n",
+ o->od_svname, rc);
- rc = __osd_obj2dbuf(env, o->od_objset.os, o->od_objset.root,
- &rootdb, root_tag);
+ rc = __osd_obj2dbuf(env, o->od_os, o->od_rootid, &rootdb);
if (rc) {
- CERROR("udmu_obj2dbuf() failed with error %d\n", rc);
- udmu_objset_close(&o->od_objset);
+ CERROR("%s: obj2dbuf() failed: rc = %d\n", o->od_svname, rc);
+ dmu_objset_disown(o->od_os, o);
+ o->od_os = NULL;
RETURN(rc);
}
o->od_root = rootdb->db_object;
- sa_buf_rele(rootdb, root_tag);
+ sa_buf_rele(rootdb, osd_obj_tag);
/* 1. initialize oi before any file create or file open */
rc = osd_oi_init(env, o);
CERROR("%s: lost %d pinned dbuf(s)\n", o->od_svname,
atomic_read(&o->od_zerocopy_pin));
- if (o->od_objset.os != NULL)
- udmu_objset_close(&o->od_objset);
+ if (o->od_os != NULL) {
+ /* force a txg sync to get all commit callbacks */
+ txg_wait_synced(dmu_objset_pool(o->od_os), 0ULL);
+
+ /* close the object set */
+ dmu_objset_disown(o->od_os, o);
+
+ o->od_os = NULL;
+ }
EXIT;
}
osd_shutdown(env, o);
osd_oi_fini(env, o);
- if (o->od_objset.os) {
- ds = dmu_objset_ds(o->od_objset.os);
+ if (o->od_os) {
+ ds = dmu_objset_ds(o->od_os);
rc = dsl_prop_unregister(ds, "xattr", osd_xattr_changed_cb, o);
if (rc)
CERROR("%s: dsl_prop_unregister xattr error %d\n",
o->arc_prune_cb = NULL;
}
osd_sync(env, lu2dt_dev(d));
- txg_wait_callbacks(spa_get_dsl(dmu_objset_spa(o->od_objset.os)));
+ txg_wait_callbacks(spa_get_dsl(dmu_objset_spa(o->od_os)));
}
rc = osd_procfs_fini(o);
RETURN(ERR_PTR(rc));
}
- if (o->od_objset.os)
+ if (o->od_os)
osd_umount(env, o);
RETURN(NULL);
*exp = class_conn2export(&conn);
- spin_lock(&osd->od_objset.lock);
+ spin_lock(&obd->obd_dev_lock);
osd->od_connects++;
- spin_unlock(&osd->od_objset.lock);
+ spin_unlock(&obd->obd_dev_lock);
RETURN(0);
}
ENTRY;
/* Only disconnect the underlying layers on the final disconnect. */
- spin_lock(&osd->od_objset.lock);
+ spin_lock(&obd->obd_dev_lock);
osd->od_connects--;
if (osd->od_connects == 0)
release = 1;
- spin_unlock(&osd->od_objset.lock);
+ spin_unlock(&obd->obd_dev_lock);
rc = class_disconnect(exp); /* bz 9811 */
#include <sys/sa_impl.h>
#include <sys/txg.h>
+static inline int osd_object_is_zap(dmu_buf_t *db)
+{
+ dmu_buf_impl_t *dbi = (dmu_buf_impl_t *) db;
+ dnode_t *dn;
+ int rc;
+
+ DB_DNODE_ENTER(dbi);
+ dn = DB_DNODE(dbi);
+ rc = (dn->dn_type == DMU_OT_DIRECTORY_CONTENTS ||
+ dn->dn_type == DMU_OT_USERGROUP_USED);
+ DB_DNODE_EXIT(dbi);
+
+ return rc;
+}
+
+/* We don't actually have direct access to the zap_hashbits() function
+ * so just pretend like we do for now. If this ever breaks we can look at
+ * it at that time. */
+#define zap_hashbits(zc) 48
+/*
+ * ZFS hash format:
+ * | cd (16 bits) | hash (48 bits) |
+ * we need it in other form:
+ * |0| hash (48 bit) | cd (15 bit) |
+ * to be a full 64-bit ordered hash so that Lustre readdir can use it to merge
+ * the readdir hashes from multiple directory stripes uniformly on the client.
+ * Another point is sign bit, the hash range should be in [0, 2^63-1] because
+ * loff_t (for llseek) needs to be a positive value. This means the "cd" field
+ * should only be the low 15 bits.
+ */
+uint64_t osd_zap_cursor_serialize(zap_cursor_t *zc)
+{
+ uint64_t zfs_hash = zap_cursor_serialize(zc) & (~0ULL >> 1);
+
+ return (zfs_hash >> zap_hashbits(zc)) |
+ (zfs_hash << (63 - zap_hashbits(zc)));
+}
+
+void osd_zap_cursor_init_serialized(zap_cursor_t *zc, struct objset *os,
+ uint64_t id, uint64_t dirhash)
+{
+ uint64_t zfs_hash = ((dirhash << zap_hashbits(zc)) & (~0ULL >> 1)) |
+ (dirhash >> (63 - zap_hashbits(zc)));
+
+ zap_cursor_init_serialized(zc, os, id, zfs_hash);
+}
+
+int osd_zap_cursor_init(zap_cursor_t **zc, struct objset *os,
+ uint64_t id, uint64_t dirhash)
+{
+ zap_cursor_t *t;
+
+ OBD_ALLOC_PTR(t);
+ if (unlikely(t == NULL))
+ return -ENOMEM;
+
+ osd_zap_cursor_init_serialized(t, os, id, dirhash);
+ *zc = t;
+
+ return 0;
+}
+
+void osd_zap_cursor_fini(zap_cursor_t *zc)
+{
+ zap_cursor_fini(zc);
+ OBD_FREE_PTR(zc);
+}
+
+static inline void osd_obj_cursor_init_serialized(zap_cursor_t *zc,
+ struct osd_object *o,
+ uint64_t dirhash)
+{
+ struct osd_device *d = osd_obj2dev(o);
+ zap_cursor_init_serialized(zc, d->od_os, o->oo_db->db_object, dirhash);
+}
+
+static inline int osd_obj_cursor_init(zap_cursor_t **zc, struct osd_object *o,
+ uint64_t dirhash)
+{
+ struct osd_device *d = osd_obj2dev(o);
+ return osd_zap_cursor_init(zc, d->od_os, o->oo_db->db_object, dirhash);
+}
+
static struct dt_it *osd_index_it_init(const struct lu_env *env,
struct dt_object *dt,
__u32 unused,
struct osd_thread_info *info = osd_oti_get(env);
struct osd_zap_it *it;
struct osd_object *obj = osd_dt_obj(dt);
- struct osd_device *osd = osd_obj2dev(obj);
struct lu_object *lo = &dt->do_lu;
+ int rc;
ENTRY;
/* XXX: check capa ? */
LASSERT(lu_object_exists(lo));
LASSERT(obj->oo_db);
- LASSERT(udmu_object_is_zap(obj->oo_db));
+ LASSERT(osd_object_is_zap(obj->oo_db));
LASSERT(info);
it = &info->oti_it_zap;
- if (udmu_zap_cursor_init(&it->ozi_zc, &osd->od_objset,
- obj->oo_db->db_object, 0))
- RETURN(ERR_PTR(-ENOMEM));
+ rc = osd_obj_cursor_init(&it->ozi_zc, obj, 0);
+ if (rc != 0)
+ RETURN(ERR_PTR(rc));
it->ozi_obj = obj;
it->ozi_capa = capa;
obj = it->ozi_obj;
- udmu_zap_cursor_fini(it->ozi_zc);
+ osd_zap_cursor_fini(it->ozi_zc);
lu_object_put(env, &obj->oo_dt.do_lu);
EXIT;
* next/finish. */
}
-int udmu_zap_cursor_retrieve_key(const struct lu_env *env,
- zap_cursor_t *zc, char *key, int max)
-{
- zap_attribute_t *za = &osd_oti_get(env)->oti_za;
- int err;
-
- if ((err = zap_cursor_retrieve(zc, za)))
- return err;
-
- if (key)
- strcpy(key, za->za_name);
-
- return 0;
-}
-
-/*
- * zap_cursor_retrieve read from current record.
- * to read bytes we need to call zap_lookup explicitly.
- */
-int udmu_zap_cursor_retrieve_value(const struct lu_env *env,
- zap_cursor_t *zc, char *buf,
- int buf_size, int *bytes_read)
-{
- zap_attribute_t *za = &osd_oti_get(env)->oti_za;
- int err, actual_size;
-
- if ((err = zap_cursor_retrieve(zc, za)))
- return err;
-
- if (za->za_integer_length <= 0)
- return (ERANGE);
-
- actual_size = za->za_integer_length * za->za_num_integers;
-
- if (actual_size > buf_size) {
- actual_size = buf_size;
- buf_size = actual_size / za->za_integer_length;
- } else {
- buf_size = za->za_num_integers;
- }
-
- err = -zap_lookup(zc->zc_objset, zc->zc_zapobj,
- za->za_name, za->za_integer_length,
- buf_size, buf);
-
- if (!err)
- *bytes_read = actual_size;
-
- return err;
-}
-
static inline void osd_it_append_attrs(struct lu_dirent *ent, __u32 attr,
int len, __u16 type)
{
struct dt_object *o,
struct lu_fid *fid)
{
+ struct osd_device *osd = osd_obj2dev(osd_dt_obj(o));
struct lustre_mdt_attrs *lma;
- udmu_objset_t *uos = &osd_obj2dev(osd_dt_obj(o))->od_objset;
struct lu_buf buf;
sa_handle_t *sa_hdl;
nvlist_t *nvbuf = NULL;
/* first of all, get parent dnode from own attributes */
LASSERT(osd_dt_obj(o)->oo_db);
- rc = -sa_handle_get(uos->os, osd_dt_obj(o)->oo_db->db_object,
+ rc = -sa_handle_get(osd->od_os, osd_dt_obj(o)->oo_db->db_object,
NULL, SA_HDL_PRIVATE, &sa_hdl);
if (rc)
RETURN(rc);
dnode = ZFS_NO_OBJECT;
- rc = -sa_lookup(sa_hdl, SA_ZPL_PARENT(uos), &dnode, 8);
+ rc = -sa_lookup(sa_hdl, SA_ZPL_PARENT(osd), &dnode, 8);
sa_handle_destroy(sa_hdl);
if (rc)
RETURN(rc);
/* now get EA buffer */
- rc = __osd_xattr_load(uos, dnode, &nvbuf);
+ rc = __osd_xattr_load(osd, dnode, &nvbuf);
if (rc)
GOTO(regular, rc);
/* no LMA attribute in SA, let's try regular EA */
/* first of all, get parent dnode storing regular EA */
- rc = -sa_handle_get(uos->os, dnode, NULL, SA_HDL_PRIVATE, &sa_hdl);
+ rc = -sa_handle_get(osd->od_os, dnode, NULL, SA_HDL_PRIVATE, &sa_hdl);
if (rc)
GOTO(out, rc);
dnode = ZFS_NO_OBJECT;
- rc = -sa_lookup(sa_hdl, SA_ZPL_XATTR(uos), &dnode, 8);
+ rc = -sa_lookup(sa_hdl, SA_ZPL_XATTR(osd), &dnode, 8);
sa_handle_destroy(sa_hdl);
if (rc)
GOTO(out, rc);
buf.lb_len = sizeof(osd_oti_get(env)->oti_buf);
/* now try to find LMA */
- rc = __osd_xattr_get_large(env, uos, dnode, &buf,
+ rc = __osd_xattr_get_large(env, osd, dnode, &buf,
XATTR_NAME_LMA, &size);
if (rc == 0 && size >= sizeof(*lma)) {
lma = buf.lb_buf;
int rc;
ENTRY;
- LASSERT(udmu_object_is_zap(obj->oo_db));
+ LASSERT(osd_object_is_zap(obj->oo_db));
if (name[0] == '.') {
if (name[1] == 0) {
}
}
- rc = -zap_lookup(osd->od_objset.os, obj->oo_db->db_object,
+ rc = -zap_lookup(osd->od_os, obj->oo_db->db_object,
(char *)key, 8, sizeof(oti->oti_zde) / 8,
(void *)&oti->oti_zde);
memcpy(rec, &oti->oti_zde.lzd_fid, sizeof(struct lu_fid));
oh = container_of0(th, struct osd_thandle, ot_super);
LASSERT(obj->oo_db);
- LASSERT(udmu_object_is_zap(obj->oo_db));
+ LASSERT(osd_object_is_zap(obj->oo_db));
dmu_tx_hold_bonus(oh->ot_tx, obj->oo_db->db_object);
dmu_tx_hold_zap(oh->ot_tx, obj->oo_db->db_object, TRUE, (char *)key);
ENTRY;
LASSERT(parent->oo_db);
- LASSERT(udmu_object_is_zap(parent->oo_db));
+ LASSERT(osd_object_is_zap(parent->oo_db));
LASSERT(dt_object_exists(dt));
LASSERT(osd_invariant(parent));
} else if (name[1] == '.' && name[2] == 0) {
/* update parent dnode in the child.
* later it will be used to generate ".." */
- udmu_objset_t *uos = &osd->od_objset;
rc = osd_object_sa_update(parent,
- SA_ZPL_PARENT(uos),
+ SA_ZPL_PARENT(osd),
&child->oo_db->db_object,
8, oh);
GOTO(out, rc);
oti->oti_zde.lzd_fid = *fid;
/* Insert (key,oid) into ZAP */
- rc = -zap_add(osd->od_objset.os, parent->oo_db->db_object,
+ rc = -zap_add(osd->od_os, parent->oo_db->db_object,
(char *)key, 8, sizeof(oti->oti_zde) / 8,
(void *)&oti->oti_zde, oh->ot_tx);
oh = container_of0(th, struct osd_thandle, ot_super);
LASSERT(obj->oo_db);
- LASSERT(udmu_object_is_zap(obj->oo_db));
+ LASSERT(osd_object_is_zap(obj->oo_db));
dmu_tx_hold_zap(oh->ot_tx, obj->oo_db->db_object, TRUE, (char *)key);
ENTRY;
LASSERT(obj->oo_db);
- LASSERT(udmu_object_is_zap(obj->oo_db));
+ LASSERT(osd_object_is_zap(obj->oo_db));
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
}
/* Remove key from the ZAP */
- rc = -zap_remove(osd->od_objset.os, zap_db->db_object,
+ rc = -zap_remove(osd->od_os, zap_db->db_object,
(char *) key, oh->ot_tx);
if (unlikely(rc && rc != -ENOENT))
{
struct osd_zap_it *it = (struct osd_zap_it *)di;
struct osd_object *obj = it->ozi_obj;
- struct osd_device *osd = osd_obj2dev(obj);
char *name = (char *)key;
int rc;
ENTRY;
LASSERT(it);
LASSERT(it->ozi_zc);
- udmu_zap_cursor_fini(it->ozi_zc);
-
- if (udmu_zap_cursor_init(&it->ozi_zc, &osd->od_objset,
- obj->oo_db->db_object, 0))
- RETURN(-ENOMEM);
+ /* reset the cursor */
+ zap_cursor_fini(it->ozi_zc);
+ osd_obj_cursor_init_serialized(it->ozi_zc, obj, 0);
/* XXX: implementation of the API is broken at the moment */
LASSERT(((const char *)key)[0] == 0);
if (unlikely(rc != 0))
GOTO(out, rc);
- lde->lde_hash = cpu_to_le64(udmu_zap_cursor_serialize(it->ozi_zc));
+ lde->lde_hash = cpu_to_le64(osd_zap_cursor_serialize(it->ozi_zc));
namelen = strlen(za->za_name);
if (namelen > NAME_MAX)
GOTO(out, rc = -EOVERFLOW);
if (it->ozi_pos <= 2)
pos = it->ozi_pos;
else
- pos = udmu_zap_cursor_serialize(it->ozi_zc);
+ pos = osd_zap_cursor_serialize(it->ozi_zc);
RETURN(pos);
}
{
struct osd_zap_it *it = (struct osd_zap_it *)di;
struct osd_object *obj = it->ozi_obj;
- struct osd_device *osd = osd_obj2dev(obj);
zap_attribute_t *za = &osd_oti_get(env)->oti_za;
int rc;
ENTRY;
- udmu_zap_cursor_fini(it->ozi_zc);
- if (udmu_zap_cursor_init(&it->ozi_zc, &osd->od_objset,
- obj->oo_db->db_object, hash))
- RETURN(-ENOMEM);
+ /* reset the cursor */
+ zap_cursor_fini(it->ozi_zc);
+ osd_obj_cursor_init_serialized(it->ozi_zc, obj, hash);
if (hash <= 2) {
it->ozi_pos = hash;
rc = osd_prepare_key_uint64(obj, k, key);
- rc = -zap_lookup_uint64(osd->od_objset.os, obj->oo_db->db_object,
+ rc = -zap_lookup_uint64(osd->od_os, obj->oo_db->db_object,
k, rc, obj->oo_recusize, obj->oo_recsize,
(void *)rec);
RETURN(rc == 0 ? 1 : rc);
rc = osd_prepare_key_uint64(obj, k, key);
/* Insert (key,oid) into ZAP */
- rc = -zap_add_uint64(osd->od_objset.os, obj->oo_db->db_object,
+ rc = -zap_add_uint64(osd->od_os, obj->oo_db->db_object,
k, rc, obj->oo_recusize, obj->oo_recsize,
(void *)rec, oh->ot_tx);
RETURN(rc);
rc = osd_prepare_key_uint64(obj, k, key);
/* Remove binary key from the ZAP */
- rc = -zap_remove_uint64(osd->od_objset.os, obj->oo_db->db_object,
+ rc = -zap_remove_uint64(osd->od_os, obj->oo_db->db_object,
k, rc, oh->ot_tx);
RETURN(rc);
}
*((__u64 *)key));
zap_cursor_fini(it->ozi_zc);
- memset(it->ozi_zc, 0, sizeof(*it->ozi_zc));
- zap_cursor_init(it->ozi_zc, osd->od_objset.os, obj->oo_db->db_object);
+ zap_cursor_init(it->ozi_zc, osd->od_os, obj->oo_db->db_object);
it->ozi_reset = 1;
RETURN(+1);
rc = osd_prepare_key_uint64(obj, k, (const struct dt_key *)za->za_name);
- rc = -zap_lookup_uint64(osd->od_objset.os, obj->oo_db->db_object,
+ rc = -zap_lookup_uint64(osd->od_os, obj->oo_db->db_object,
k, rc, obj->oo_recusize, obj->oo_recsize,
(void *)rec);
RETURN(rc);
int rc;
ENTRY;
- /* close the current cursor */
+ /* reset the cursor */
zap_cursor_fini(it->ozi_zc);
-
- /* create a new one starting at hash */
- memset(it->ozi_zc, 0, sizeof(*it->ozi_zc));
- zap_cursor_init_serialized(it->ozi_zc, osd->od_objset.os,
+ zap_cursor_init_serialized(it->ozi_zc, osd->od_os,
obj->oo_db->db_object, hash);
it->ozi_reset = 0;
/* XXX: dmu_object_next() does NOT find dnodes allocated
* in the current non-committed txg, so we force txg
* commit to find all existing dnodes ... */
- txg_wait_synced(dmu_objset_pool(dev->od_objset.os), 0ULL);
+ txg_wait_synced(dmu_objset_pool(dev->od_os), 0ULL);
RETURN((struct dt_it *)it);
}
struct osd_metadnode_it *it)
{
struct osd_device *dev = it->mit_dev;
- udmu_objset_t *uos = &dev->od_objset;
int rc;
/* can go negative on the very first access to the iterator
it->mit_prefetched_dnode = it->mit_pos;
while (it->mit_prefetched < OTABLE_PREFETCH) {
- rc = -dmu_object_next(uos->os, &it->mit_prefetched_dnode,
+ rc = -dmu_object_next(dev->od_os, &it->mit_prefetched_dnode,
B_FALSE, 0);
if (unlikely(rc != 0))
break;
/* dmu_prefetch() was exported in 0.6.2, if you use with
* an older release, just comment it out - this is an
* optimization */
- dmu_prefetch(uos->os, it->mit_prefetched_dnode, 0, 0);
+ dmu_prefetch(dev->od_os, it->mit_prefetched_dnode, 0, 0);
it->mit_prefetched++;
}
struct osd_metadnode_it *it = (struct osd_metadnode_it *)di;
struct lustre_mdt_attrs *lma;
struct osd_device *dev = it->mit_dev;
- udmu_objset_t *uos = &dev->od_objset;
nvlist_t *nvbuf = NULL;
uchar_t *v;
__u64 dnode;
dnode = it->mit_pos;
do {
- rc = -dmu_object_next(uos->os, &it->mit_pos, B_FALSE, 0);
+ rc = -dmu_object_next(dev->od_os, &it->mit_pos, B_FALSE, 0);
if (unlikely(rc != 0))
GOTO(out, rc = 1);
it->mit_prefetched--;
/* LMA is required for this to be a Lustre object.
* If there is no xattr skip it. */
- rc = __osd_xattr_load(uos, it->mit_pos, &nvbuf);
+ rc = __osd_xattr_load(dev, it->mit_pos, &nvbuf);
if (unlikely(rc != 0))
continue;
LASSERT(obj->oo_db != NULL);
if (likely(feat == &dt_directory_features)) {
- if (udmu_object_is_zap(obj->oo_db))
+ if (osd_object_is_zap(obj->oo_db))
dt->do_index_ops = &osd_dir_ops;
else
RETURN(-ENOTDIR);
} else if (unlikely(feat == &dt_acct_features)) {
LASSERT(fid_is_acct(lu_object_fid(&dt->do_lu)));
dt->do_index_ops = &osd_acct_index_ops;
- } else if (udmu_object_is_zap(obj->oo_db) &&
+ } else if (osd_object_is_zap(obj->oo_db) &&
dt->do_index_ops == NULL) {
/* For index file, we don't support variable key & record sizes
* and the key has to be unique */
#endif
#include <sys/arc.h>
-
#include <sys/nvpair.h>
-
#include <sys/zfs_znode.h>
-#include "udmu.h"
+#include <sys/zap.h>
#define LUSTRE_ROOT_FID_SEQ 0
#define DMU_OSD_SVNAME "svname"
#define OSD_GFP_IO (GFP_NOFS | __GFP_HIGHMEM)
+/* Statfs space reservation for grant, fragmentation, and unlink space. */
+#define OSD_STATFS_RESERVED_BLKS (1ULL << (22 - SPA_MAXBLOCKSHIFT)) /* 4MB */
+#define OSD_STATFS_RESERVED_SHIFT (7) /* reserve 0.78% of all space */
+
+/* Statfs {minimum, safe estimate, and maximum} dnodes per block */
+#define OSD_DNODE_MIN_BLKSHIFT (SPA_MAXBLOCKSHIFT - DNODE_SHIFT) /* 17-9 =8 */
+#define OSD_DNODE_EST_BLKSHIFT (SPA_MAXBLOCKSHIFT - 12) /* 17-12=5 */
+#define OSD_DNODE_EST_COUNT 1024
+
+#define OSD_GRANT_FOR_LOCAL_OIDS (2ULL << 20) /* 2MB for last_rcvd, ... */
+
/**
* Iterator's in-memory data structure for quota file.
*/
/* super-class */
struct dt_device od_dt_dev;
/* information about underlying file system */
- udmu_objset_t od_objset;
+ struct objset *od_os;
+ uint64_t od_rootid; /* id of root znode */
+ /* SA attr mapping->id,
+ * name is the same as in ZFS to use defines SA_ZPL_...*/
+ sa_attr_type_t *z_attr_table;
/*
* Fid Capability
qid_t uid, qid_t gid, long long space,
struct osd_thandle *oh, bool is_blk, int *flags,
bool force);
+uint64_t osd_objs_count_estimate(uint64_t refdbytes, uint64_t usedobjs,
+ uint64_t nrblocks);
/*
* Helpers.
static inline struct objset * osd_dtobj2objset(struct dt_object *o)
{
- return osd_dev(o->do_lu.lo_dev)->od_objset.os;
+ return osd_dev(o->do_lu.lo_dev)->od_os;
}
static inline int osd_invariant(const struct osd_object *obj)
int osd_procfs_init(struct osd_device *osd, const char *name);
int osd_procfs_fini(struct osd_device *osd);
-int udmu_zap_cursor_retrieve_key(const struct lu_env *env,
- zap_cursor_t *zc, char *key, int max);
-int udmu_zap_cursor_retrieve_value(const struct lu_env *env,
- zap_cursor_t *zc, char *buf,
- int buf_size, int *bytes_read);
-
/* osd_object.c */
+extern char *osd_obj_tag;
void osd_object_sa_dirty_rele(struct osd_thandle *oh);
int __osd_obj2dbuf(const struct lu_env *env, objset_t *os,
- uint64_t oid, dmu_buf_t **dbp, void *tag);
+ uint64_t oid, dmu_buf_t **dbp);
struct lu_object *osd_object_alloc(const struct lu_env *env,
const struct lu_object_header *hdr,
struct lu_device *d);
int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type,
void *buf, uint32_t buflen, struct osd_thandle *oh);
-int __osd_zap_create(const struct lu_env *env, udmu_objset_t *uos,
+int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
dmu_buf_t **zap_dbp, dmu_tx_t *tx, struct lu_attr *la,
- uint64_t parent, void *tag, zap_flags_t flags);
-int __osd_object_create(const struct lu_env *env, udmu_objset_t *uos,
+ uint64_t parent, zap_flags_t flags);
+int __osd_object_create(const struct lu_env *env, struct osd_device *osd,
dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la,
- uint64_t parent, void *tag);
-int __osd_object_free(udmu_objset_t *uos, uint64_t oid, dmu_tx_t *tx);
+ uint64_t parent);
/* osd_oi.c */
int osd_oi_init(const struct lu_env *env, struct osd_device *o);
const struct dt_index_features *feat);
int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
obd_seq seq, struct lu_seq_range *range);
+void osd_zap_cursor_init_serialized(zap_cursor_t *zc, struct objset *os,
+ uint64_t id, uint64_t dirhash);
+int osd_zap_cursor_init(zap_cursor_t **zc, struct objset *os,
+ uint64_t id, uint64_t dirhash);
+void osd_zap_cursor_fini(zap_cursor_t *zc);
+uint64_t osd_zap_cursor_serialize(zap_cursor_t *zc);
/* osd_xattr.c */
-int __osd_xattr_load(udmu_objset_t *uos, uint64_t dnode, nvlist_t **sa_xattr);
-int __osd_xattr_get_large(const struct lu_env *env, udmu_objset_t *uos,
+int __osd_xattr_load(struct osd_device *osd, uint64_t dnode,
+ nvlist_t **sa_xattr);
+int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
uint64_t xattr, struct lu_buf *buf,
const char *name, int *sizep);
int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
size = old_size - *pos;
}
- rc = -dmu_read(osd->od_objset.os, obj->oo_db->db_object, *pos, size,
+ rc = -dmu_read(osd->od_os, obj->oo_db->db_object, *pos, size,
buf->lb_buf, DMU_READ_PREFETCH);
if (rc == 0) {
rc = size;
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
struct osd_thandle *oh;
uint64_t offset = *pos;
int rc;
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
- dmu_write(osd->od_objset.os, obj->oo_db->db_object, offset,
+ dmu_write(osd->od_os, obj->oo_db->db_object, offset,
(uint64_t)buf->lb_len, buf->lb_buf, oh->ot_tx);
write_lock(&obj->oo_attr_lock);
if (obj->oo_attr.la_size < offset + buf->lb_len) {
/* osd_object_sa_update() will be copying directly from oo_attr
* into dbuf. any update within a single txg will copy the
* most actual */
- rc = osd_object_sa_update(obj, SA_ZPL_SIZE(uos),
+ rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd),
&obj->oo_attr.la_size, 8, oh);
if (unlikely(rc))
GOTO(out, rc);
/* backend zfs filesystem might be configured to store multiple data
* copies */
- space *= osd->od_objset.os->os_copies;
+ space *= osd->od_os->os_copies;
space = toqb(space);
CDEBUG(D_QUOTA, "writting %d pages, reserving "LPD64"K of quota "
"space\n", npages, space);
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
struct osd_thandle *oh;
uint64_t new_size = 0;
int i, rc = 0;
}
if (lnb[i].lnb_page->mapping == (void *)obj) {
- dmu_write(osd->od_objset.os, obj->oo_db->db_object,
+ dmu_write(osd->od_os, obj->oo_db->db_object,
lnb[i].lnb_file_offset, lnb[i].lnb_len,
kmap(lnb[i].lnb_page), oh->ot_tx);
kunmap(lnb[i].lnb_page);
/* osd_object_sa_update() will be copying directly from
* oo_attr into dbuf. any update within a single txg will copy
* the most actual */
- rc = osd_object_sa_update(obj, SA_ZPL_SIZE(uos),
- &obj->oo_attr.la_size, 8, oh);
+ rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd),
+ &obj->oo_attr.la_size, 8, oh);
} else {
write_unlock(&obj->oo_attr_lock);
}
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
struct osd_thandle *oh;
__u64 len;
int rc = 0;
len = end - start;
write_unlock(&obj->oo_attr_lock);
- rc = __osd_object_punch(osd->od_objset.os, obj->oo_db, oh->ot_tx,
+ rc = __osd_object_punch(osd->od_os, obj->oo_db, oh->ot_tx,
obj->oo_attr.la_size, start, len);
/* set new size */
if (len == DMU_OBJECT_END) {
write_lock(&obj->oo_attr_lock);
obj->oo_attr.la_size = start;
write_unlock(&obj->oo_attr_lock);
- rc = osd_object_sa_update(obj, SA_ZPL_SIZE(uos),
- &obj->oo_attr.la_size, 8, oh);
+ rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd),
+ &obj->oo_attr.la_size, 8, oh);
}
RETURN(rc);
}
#include <sys/sa_impl.h>
#include <sys/txg.h>
-static char *osd_obj_tag = "osd_object";
+char *osd_obj_tag = "osd_object";
static struct dt_object_operations osd_obj_ops;
static struct lu_object_operations osd_lu_obj_ops;
}
static int
-osd_object_sa_init(struct osd_object *obj, udmu_objset_t *uos)
+osd_object_sa_init(struct osd_object *obj, struct osd_device *o)
{
int rc;
LASSERT(obj->oo_sa_hdl == NULL);
LASSERT(obj->oo_db != NULL);
- rc = -sa_handle_get(uos->os, obj->oo_db->db_object, obj,
+ rc = -sa_handle_get(o->od_os, obj->oo_db->db_object, obj,
SA_HDL_PRIVATE, &obj->oo_sa_hdl);
if (rc)
return rc;
/* Cache the xattr object id, valid for the life of the object */
- rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_XATTR(uos), &obj->oo_xattr, 8);
+ rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_XATTR(o), &obj->oo_xattr, 8);
if (rc == -ENOENT) {
obj->oo_xattr = ZFS_NO_OBJECT;
rc = 0;
/*
* Retrieve the attributes of a DMU object
*/
-int __osd_object_attr_get(const struct lu_env *env, udmu_objset_t *uos,
+int __osd_object_attr_get(const struct lu_env *env, struct osd_device *o,
struct osd_object *obj, struct lu_attr *la)
{
struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
LASSERT(obj->oo_db != NULL);
- rc = -sa_handle_get(uos->os, obj->oo_db->db_object, NULL,
+ rc = -sa_handle_get(o->od_os, obj->oo_db->db_object, NULL,
SA_HDL_PRIVATE, &sa_hdl);
if (rc)
RETURN(rc);
la->la_valid |= LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE | LA_TYPE |
LA_SIZE | LA_UID | LA_GID | LA_FLAGS | LA_NLINK;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(uos), NULL, osa->atime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(uos), NULL, osa->mtime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(uos), NULL, osa->ctime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(uos), NULL, &osa->mode, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(uos), NULL, &osa->size, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(uos), NULL, &osa->nlink, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(uos), NULL, &osa->uid, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(uos), NULL, &osa->gid, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(uos), NULL, &osa->flags, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(o), NULL, osa->atime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(o), NULL, osa->mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(o), NULL, osa->ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(o), NULL, &osa->mode, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(o), NULL, &osa->size, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(o), NULL, &osa->nlink, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(o), NULL, &osa->uid, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(o), NULL, &osa->gid, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(o), NULL, &osa->flags, 8);
rc = -sa_bulk_lookup(sa_hdl, bulk, cnt);
if (rc)
la->la_size = osa->size;
if (S_ISCHR(la->la_mode) || S_ISBLK(la->la_mode)) {
- rc = -sa_lookup(sa_hdl, SA_ZPL_RDEV(uos), &osa->rdev, 8);
+ rc = -sa_lookup(sa_hdl, SA_ZPL_RDEV(o), &osa->rdev, 8);
if (rc)
GOTO(out_bulk, rc);
la->la_rdev = osa->rdev;
}
int __osd_obj2dbuf(const struct lu_env *env, objset_t *os,
- uint64_t oid, dmu_buf_t **dbp, void *tag)
+ uint64_t oid, dmu_buf_t **dbp)
{
dmu_object_info_t *doi = &osd_oti_get(env)->oti_doi;
int rc;
- LASSERT(tag);
-
- rc = -sa_buf_hold(os, oid, tag, dbp);
+ rc = -sa_buf_hold(os, oid, osd_obj_tag, dbp);
if (rc)
return rc;
dmu_object_info_from_db(*dbp, doi);
if (unlikely (oid != DMU_USERUSED_OBJECT &&
oid != DMU_GROUPUSED_OBJECT && doi->doi_bonus_type != DMU_OT_SA)) {
- sa_buf_rele(*dbp, tag);
+ sa_buf_rele(*dbp, osd_obj_tag);
*dbp = NULL;
return -EINVAL;
}
int osd_object_init0(const struct lu_env *env, struct osd_object *obj)
{
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
int rc = 0;
ENTRY;
/* object exist */
- rc = osd_object_sa_init(obj, uos);
+ rc = osd_object_sa_init(obj, osd);
if (rc)
RETURN(rc);
/* cache attrs in object */
- rc = __osd_object_attr_get(env, &osd->od_objset,
- obj, &obj->oo_attr);
+ rc = __osd_object_attr_get(env, osd, obj, &obj->oo_attr);
if (rc)
RETURN(rc);
rc = osd_fid_lookup(env, osd, lu_object_fid(l), &oid);
if (rc == 0) {
LASSERT(obj->oo_db == NULL);
- rc = __osd_obj2dbuf(env, osd->od_objset.os, oid,
- &obj->oo_db, osd_obj_tag);
+ rc = __osd_obj2dbuf(env, osd->od_os, oid, &obj->oo_db);
if (rc != 0) {
CERROR("%s: lookup "DFID"/"LPX64" failed: rc = %d\n",
osd->od_svname, PFID(lu_object_fid(l)), oid, rc);
struct osd_thandle *oh)
{
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
dmu_buf_t *db = obj->oo_db;
zap_attribute_t *za = &osd_oti_get(env)->oti_za;
uint64_t oid = db->db_object, xid;
dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
- rc = -udmu_zap_cursor_init(&zc, uos, oid, 0);
+ rc = osd_zap_cursor_init(&zc, osd->od_os, oid, 0);
if (rc)
goto out;
BUG_ON(za->za_integer_length != sizeof(uint64_t));
BUG_ON(za->za_num_integers != 1);
- rc = -zap_lookup(uos->os, obj->oo_xattr, za->za_name,
+ rc = -zap_lookup(osd->od_os, obj->oo_xattr, za->za_name,
sizeof(uint64_t), 1, &xid);
if (rc) {
CERROR("%s: xattr lookup failed: rc = %d\n",
if (rc == -ENOENT)
rc = 0;
out_err:
- udmu_zap_cursor_fini(zc);
+ osd_zap_cursor_fini(zc);
}
out:
if (rc && tx->tx_err == 0)
RETURN(rc);
}
-int __osd_object_free(udmu_objset_t *uos, uint64_t oid, dmu_tx_t *tx)
-{
- LASSERT(uos->objects != 0);
- spin_lock(&uos->lock);
- uos->objects--;
- spin_unlock(&uos->lock);
-
- return -dmu_object_free(uos->os, oid, tx);
-}
-
/*
* Delete a DMU object
*
dmu_tx_t *tx, void *tag)
{
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
uint64_t xid;
zap_attribute_t *za = &osd_oti_get(env)->oti_za;
zap_cursor_t *zc;
/* zap holding xattrs */
if (obj->oo_xattr != ZFS_NO_OBJECT) {
- rc = -udmu_zap_cursor_init(&zc, uos, obj->oo_xattr, 0);
+ rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
if (rc)
return rc;
while ((rc = -zap_cursor_retrieve(zc, za)) == 0) {
BUG_ON(za->za_integer_length != sizeof(uint64_t));
BUG_ON(za->za_num_integers != 1);
- rc = -zap_lookup(uos->os, obj->oo_xattr, za->za_name,
+ rc = -zap_lookup(osd->od_os, obj->oo_xattr, za->za_name,
sizeof(uint64_t), 1, &xid);
if (rc) {
CERROR("%s: lookup xattr %s failed: rc = %d\n",
osd->od_svname, za->za_name, rc);
continue;
}
- rc = __osd_object_free(uos, xid, tx);
+ rc = -dmu_object_free(osd->od_os, xid, tx);
if (rc)
CERROR("%s: fetch xattr %s failed: rc = %d\n",
osd->od_svname, za->za_name, rc);
zap_cursor_advance(zc);
}
- udmu_zap_cursor_fini(zc);
+ osd_zap_cursor_fini(zc);
- rc = __osd_object_free(uos, obj->oo_xattr, tx);
+ rc = -dmu_object_free(osd->od_os, obj->oo_xattr, tx);
if (rc)
CERROR("%s: freeing xattr failed: rc = %d\n",
osd->od_svname, rc);
}
- return __osd_object_free(uos, obj->oo_db->db_object, tx);
+ return -dmu_object_free(osd->od_os, obj->oo_db->db_object, tx);
}
static int osd_object_destroy(const struct lu_env *env,
zapid = osd_get_name_n_idx(env, osd, fid, buf);
/* remove obj ref from index dir (it depends) */
- rc = -zap_remove(osd->od_objset.os, zapid, buf, oh->ot_tx);
+ rc = -zap_remove(osd->od_os, zapid, buf, oh->ot_tx);
if (rc) {
CERROR("%s: zap_remove() failed: rc = %d\n",
osd->od_svname, rc);
/* Remove object from inode accounting. It is not fatal for the destroy
* operation if something goes wrong while updating accounting, but we
* still log an error message to notify the administrator */
- rc = -zap_increment_int(osd->od_objset.os, osd->od_iusr_oid,
+ rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
obj->oo_attr.la_uid, -1, oh->ot_tx);
if (rc)
CERROR("%s: failed to remove "DFID" from accounting ZAP for usr"
" %d: rc = %d\n", osd->od_svname, PFID(fid),
obj->oo_attr.la_uid, rc);
- rc = -zap_increment_int(osd->od_objset.os, osd->od_igrp_oid,
+ rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
obj->oo_attr.la_gid, -1, oh->ot_tx);
if (rc)
CERROR("%s: failed to remove "DFID" from accounting ZAP for grp"
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
struct osd_thandle *oh;
struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
sa_bulk_attr_t *bulk;
if ((valid & LA_UID) && (la->la_uid != obj->oo_attr.la_uid)) {
/* Update user accounting. Failure isn't fatal, but we still
* log an error message */
- rc = -zap_increment_int(osd->od_objset.os, osd->od_iusr_oid,
+ rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
la->la_uid, 1, oh->ot_tx);
if (rc)
CERROR("%s: failed to update accounting ZAP for user "
"%d (%d)\n", osd->od_svname, la->la_uid, rc);
- rc = -zap_increment_int(osd->od_objset.os, osd->od_iusr_oid,
+ rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
obj->oo_attr.la_uid, -1, oh->ot_tx);
if (rc)
CERROR("%s: failed to update accounting ZAP for user "
if ((valid & LA_GID) && (la->la_gid != obj->oo_attr.la_gid)) {
/* Update group accounting. Failure isn't fatal, but we still
* log an error message */
- rc = -zap_increment_int(osd->od_objset.os, osd->od_igrp_oid,
+ rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
la->la_gid, 1, oh->ot_tx);
if (rc)
CERROR("%s: failed to update accounting ZAP for user "
"%d (%d)\n", osd->od_svname, la->la_gid, rc);
- rc = -zap_increment_int(osd->od_objset.os, osd->od_igrp_oid,
+ rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
obj->oo_attr.la_gid, -1, oh->ot_tx);
if (rc)
CERROR("%s: failed to update accounting ZAP for user "
cnt = 0;
if (valid & LA_ATIME) {
osa->atime[0] = obj->oo_attr.la_atime = la->la_atime;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL,
osa->atime, 16);
}
if (valid & LA_MTIME) {
osa->mtime[0] = obj->oo_attr.la_mtime = la->la_mtime;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL,
osa->mtime, 16);
}
if (valid & LA_CTIME) {
osa->ctime[0] = obj->oo_attr.la_ctime = la->la_ctime;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL,
osa->ctime, 16);
}
if (valid & LA_MODE) {
obj->oo_attr.la_mode = (obj->oo_attr.la_mode & S_IFMT) |
(la->la_mode & ~S_IFMT);
osa->mode = obj->oo_attr.la_mode;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL,
&osa->mode, 8);
}
if (valid & LA_SIZE) {
osa->size = obj->oo_attr.la_size = la->la_size;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL,
&osa->size, 8);
}
if (valid & LA_NLINK) {
osa->nlink = obj->oo_attr.la_nlink = la->la_nlink;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL,
&osa->nlink, 8);
}
if (valid & LA_RDEV) {
osa->rdev = obj->oo_attr.la_rdev = la->la_rdev;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL,
&osa->rdev, 8);
}
if (valid & LA_FLAGS) {
/* many flags are not supported by zfs, so ensure a good cached
* copy */
obj->oo_attr.la_flags = attrs_zfs2fs(osa->flags);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL,
&osa->flags, 8);
}
if (valid & LA_UID) {
osa->uid = obj->oo_attr.la_uid = la->la_uid;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL,
&osa->uid, 8);
}
if (valid & LA_GID) {
osa->gid = obj->oo_attr.la_gid = la->la_gid;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(uos), NULL,
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL,
&osa->gid, 8);
}
obj->oo_attr.la_valid |= valid;
RETURN(rc);
}
-int __osd_attr_init(const struct lu_env *env, udmu_objset_t *uos, uint64_t oid,
- dmu_tx_t *tx, struct lu_attr *la, uint64_t parent)
+int __osd_attr_init(const struct lu_env *env, struct osd_device *osd,
+ uint64_t oid, dmu_tx_t *tx, struct lu_attr *la,
+ uint64_t parent)
{
sa_bulk_attr_t *bulk;
sa_handle_t *sa_hdl;
osa->size = la->la_size;
/* Now add in all of the "SA" attributes */
- rc = -sa_handle_get(uos->os, oid, NULL, SA_HDL_PRIVATE, &sa_hdl);
+ rc = -sa_handle_get(osd->od_os, oid, NULL, SA_HDL_PRIVATE, &sa_hdl);
if (rc)
return rc;
* work around the problem. See ORI-610.
*/
cnt = 0;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(uos), NULL, &osa->mode, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(uos), NULL, &osa->size, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(uos), NULL, &gen, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(uos), NULL, &osa->uid, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(uos), NULL, &osa->gid, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(uos), NULL, &parent, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(uos), NULL, &osa->flags, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(uos), NULL, osa->atime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(uos), NULL, osa->mtime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(uos), NULL, osa->ctime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(uos), NULL, crtime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(uos), NULL, &osa->nlink, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(uos), NULL, &osa->rdev, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL, &parent, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, crtime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
rc = -sa_replace_all_by_template(sa_hdl, bulk, cnt, tx);
* dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
* to a transaction group.
*/
-int __osd_object_create(const struct lu_env *env, udmu_objset_t *uos,
+int __osd_object_create(const struct lu_env *env, struct osd_device *osd,
dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la,
- uint64_t parent, void *tag)
+ uint64_t parent)
{
uint64_t oid;
int rc;
- LASSERT(tag);
/* Assert that the transaction has been assigned to a
transaction group. */
LASSERT(tx->tx_txg != 0);
/* Create a new DMU object. */
- oid = dmu_object_alloc(uos->os, DMU_OT_PLAIN_FILE_CONTENTS, 0,
+ oid = dmu_object_alloc(osd->od_os, DMU_OT_PLAIN_FILE_CONTENTS, 0,
DMU_OT_SA, DN_MAX_BONUSLEN, tx);
- rc = -sa_buf_hold(uos->os, oid, tag, dbp);
+ rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, dbp);
LASSERTF(rc == 0, "sa_buf_hold "LPU64" failed: %d\n", oid, rc);
LASSERT(la->la_valid & LA_MODE);
la->la_size = 0;
la->la_nlink = 1;
- rc = __osd_attr_init(env, uos, oid, tx, la, parent);
+ rc = __osd_attr_init(env, osd, oid, tx, la, parent);
if (rc != 0) {
- sa_buf_rele(*dbp, tag);
+ sa_buf_rele(*dbp, osd_obj_tag);
*dbp = NULL;
- dmu_object_free(uos->os, oid, tx);
+ dmu_object_free(osd->od_os, oid, tx);
return rc;
}
- spin_lock(&uos->lock);
- uos->objects++;
- spin_unlock(&uos->lock);
return 0;
}
* will also require a FAT ZAP. If there is a new type of micro ZAP created
* then we might need to re-evaluate the use of this flag and instead do
* a conversion from the different internal ZAP hash formats being used. */
-int __osd_zap_create(const struct lu_env *env, udmu_objset_t *uos,
+int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
dmu_buf_t **zap_dbp, dmu_tx_t *tx,
- struct lu_attr *la, uint64_t parent,
- void *tag, zap_flags_t flags)
+ struct lu_attr *la, uint64_t parent, zap_flags_t flags)
{
uint64_t oid;
int rc;
- LASSERT(tag);
-
- spin_lock(&uos->lock);
- uos->objects++;
- spin_unlock(&uos->lock);
-
/* Assert that the transaction has been assigned to a
transaction group. */
LASSERT(tx->tx_txg != 0);
- oid = zap_create_flags(uos->os, 0, flags | ZAP_FLAG_HASH64,
+ oid = zap_create_flags(osd->od_os, 0, flags | ZAP_FLAG_HASH64,
DMU_OT_DIRECTORY_CONTENTS,
14, /* == ZFS fzap_default_block_shift */
DN_MAX_INDBLKSHIFT, /* indirect block shift */
DMU_OT_SA, DN_MAX_BONUSLEN, tx);
- rc = -sa_buf_hold(uos->os, oid, tag, zap_dbp);
+ rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, zap_dbp);
if (rc)
return rc;
la->la_size = 2;
la->la_nlink = 1;
- return __osd_attr_init(env, uos, oid, tx, la, parent);
+ return __osd_attr_init(env, osd, oid, tx, la, parent);
}
static dmu_buf_t *osd_mkidx(const struct lu_env *env, struct osd_device *osd,
* We set ZAP_FLAG_UINT64_KEY to let ZFS know than we are going to use
* binary keys */
LASSERT(S_ISREG(la->la_mode));
- rc = __osd_zap_create(env, &osd->od_objset, &db, oh->ot_tx, la,
- parent, osd_obj_tag, ZAP_FLAG_UINT64_KEY);
+ rc = __osd_zap_create(env, osd, &db, oh->ot_tx, la, parent,
+ ZAP_FLAG_UINT64_KEY);
if (rc)
return ERR_PTR(rc);
return db;
int rc;
LASSERT(S_ISDIR(la->la_mode));
- rc = __osd_zap_create(env, &osd->od_objset, &db, oh->ot_tx, la,
- parent, osd_obj_tag, 0);
+ rc = __osd_zap_create(env, osd, &db, oh->ot_tx, la, parent, 0);
if (rc)
return ERR_PTR(rc);
return db;
int rc;
LASSERT(S_ISREG(la->la_mode));
- rc = __osd_object_create(env, &osd->od_objset, &db, oh->ot_tx, la,
- parent, osd_obj_tag);
+ rc = __osd_object_create(env, osd, &db, oh->ot_tx, la, parent);
if (rc)
return ERR_PTR(rc);
* a method in OSD API to control this from OFD/MDD
*/
if (!lu_device_is_md(osd2lu_dev(osd))) {
- rc = -dmu_object_set_blocksize(osd->od_objset.os,
+ rc = -dmu_object_set_blocksize(osd->od_os,
db->db_object,
128 << 10, 0, oh->ot_tx);
if (unlikely(rc)) {
int rc;
LASSERT(S_ISLNK(la->la_mode));
- rc = __osd_object_create(env, &osd->od_objset, &db, oh->ot_tx, la,
- parent, osd_obj_tag);
+ rc = __osd_object_create(env, osd, &db, oh->ot_tx, la, parent);
if (rc)
return ERR_PTR(rc);
return db;
if (S_ISCHR(la->la_mode) || S_ISBLK(la->la_mode))
la->la_valid |= LA_RDEV;
- rc = __osd_object_create(env, &osd->od_objset, &db, oh->ot_tx, la,
- parent, osd_obj_tag);
+ rc = __osd_object_create(env, osd, &db, oh->ot_tx, la, parent);
if (rc)
return ERR_PTR(rc);
return db;
zapid = osd_get_name_n_idx(env, osd, fid, buf);
- rc = -zap_add(osd->od_objset.os, zapid, buf, 8, 1, zde, oh->ot_tx);
+ rc = -zap_add(osd->od_os, zapid, buf, 8, 1, zde, oh->ot_tx);
if (rc)
GOTO(out, rc);
/* Add new object to inode accounting.
* Errors are not considered as fatal */
- rc = -zap_increment_int(osd->od_objset.os, osd->od_iusr_oid,
+ rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
(attr->la_valid & LA_UID) ? attr->la_uid : 0, 1,
oh->ot_tx);
if (rc)
CERROR("%s: failed to add "DFID" to accounting ZAP for usr %d "
"(%d)\n", osd->od_svname, PFID(fid), attr->la_uid, rc);
- rc = -zap_increment_int(osd->od_objset.os, osd->od_igrp_oid,
+ rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
(attr->la_valid & LA_GID) ? attr->la_gid : 0, 1,
oh->ot_tx);
if (rc)
struct osd_object *obj = osd_dt_obj(dt);
struct osd_thandle *oh;
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
uint64_t nlink;
int rc;
nlink = ++obj->oo_attr.la_nlink;
write_unlock(&obj->oo_attr_lock);
- rc = osd_object_sa_update(obj, SA_ZPL_LINKS(uos), &nlink, 8, oh);
+ rc = osd_object_sa_update(obj, SA_ZPL_LINKS(osd), &nlink, 8, oh);
return rc;
}
struct osd_object *obj = osd_dt_obj(dt);
struct osd_thandle *oh;
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
uint64_t nlink;
int rc;
nlink = --obj->oo_attr.la_nlink;
write_unlock(&obj->oo_attr_lock);
- rc = osd_object_sa_update(obj, SA_ZPL_LINKS(uos), &nlink, 8, oh);
+ rc = osd_object_sa_update(obj, SA_ZPL_LINKS(osd), &nlink, 8, oh);
return rc;
}
* support ZIL. If the object tracked the txg that it was last
* modified in, it could pass that txg here instead of "0". Maybe
* the changes are already committed, so no wait is needed at all? */
- txg_wait_synced(dmu_objset_pool(osd->od_objset.os), 0ULL);
+ txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL);
RETURN(0);
}
#include <sys/sa_impl.h>
#include <sys/txg.h>
-static char *oi_tag = "osd_mount, oi";
-
#define OSD_OI_FID_NR (1UL << 7)
#define OSD_OI_FID_NR_MAX (1UL << OSD_OI_FID_OID_BITS_MAX)
unsigned int osd_oi_count = OSD_OI_FID_NR;
struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg;
int rc;
- rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde);
+ rc = -zap_lookup(o->od_os, parent, name, 8, 1, (void *)zde);
if (rc)
return rc;
int rc;
/* verify it doesn't already exist */
- rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde);
+ rc = -zap_lookup(o->od_os, parent, name, 8, 1, (void *)zde);
if (rc == 0)
return -EEXIST;
/* create fid-to-dnode index */
- tx = dmu_tx_create(o->od_objset.os);
+ tx = dmu_tx_create(o->od_os);
if (tx == NULL)
return -ENOMEM;
la->la_valid = LA_MODE | LA_UID | LA_GID;
la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
la->la_uid = la->la_gid = 0;
- __osd_zap_create(env, &o->od_objset, &db, tx, la, parent, oi_tag, 0);
+ __osd_zap_create(env, o, &db, tx, la, parent, 0);
zde->zde_dnode = db->db_object;
zde->zde_pad = 0;
zde->zde_type = IFTODT(S_IFDIR);
- rc = -zap_add(o->od_objset.os, parent, name, 8, 1, (void *)zde, tx);
+ rc = -zap_add(o->od_os, parent, name, 8, 1, (void *)zde, tx);
dmu_tx_commit(tx);
*child = db->db_object;
- sa_buf_rele(db, oi_tag);
+ sa_buf_rele(db, osd_obj_tag);
return rc;
}
} else {
zapid = osd_get_name_n_idx(env, dev, fid, buf);
- rc = -zap_lookup(dev->od_objset.os, zapid, buf,
+ rc = -zap_lookup(dev->od_os, zapid, buf,
8, 1, &info->oti_zde);
if (rc)
RETURN(rc);
}
if (rc == 0)
- dmu_prefetch(dev->od_objset.os, *oid, 0, 0);
+ dmu_prefetch(dev->od_os, *oid, 0, 0);
RETURN(rc);
}
RETURN(0);
/* lookup /ROOT */
- rc = -zap_lookup(o->od_objset.os, o->od_root, root2convert, 8,
+ rc = -zap_lookup(o->od_os, o->od_root, root2convert, 8,
sizeof(*lze) / 8, (void *)lze);
/* doesn't exist or let actual user to handle the error */
if (rc)
if (fid_seq(&lze->lzd_fid) == FID_SEQ_ROOT)
return 0;
- tx = dmu_tx_create(o->od_objset.os);
+ tx = dmu_tx_create(o->od_os);
if (tx == NULL)
return -ENOMEM;
if (rc)
GOTO(err, rc);
- rc = -zap_remove(o->od_objset.os, o->od_root, root2convert, tx);
+ rc = -zap_remove(o->od_os, o->od_root, root2convert, tx);
if (rc)
GOTO(err, rc);
/* remove from OI */
zapid = osd_get_name_n_idx(env, o, &lze->lzd_fid, buf);
- rc = -zap_remove(o->od_objset.os, zapid, buf, tx);
+ rc = -zap_remove(o->od_os, zapid, buf, tx);
if (rc)
GOTO(err, rc);
lze->lzd_fid = newfid;
- rc = -zap_add(o->od_objset.os, o->od_root, root2convert,
+ rc = -zap_add(o->od_os, o->od_root, root2convert,
8, sizeof(*lze) / 8, (void *)lze, tx);
if (rc)
GOTO(err, rc);
/* add to OI with the new fid */
zapid = osd_get_name_n_idx(env, o, &newfid, buf);
- rc = -zap_add(o->od_objset.os, zapid, buf, 8, 1, &lze->lzd_reg, tx);
+ rc = -zap_add(o->od_os, zapid, buf, 8, 1, &lze->lzd_reg, tx);
if (rc)
GOTO(err, rc);
}
/**
+ * Helper function to estimate the number of inodes in use for a give uid/gid
+ * from the block usage
+ */
+static uint64_t osd_objset_user_iused(struct osd_device *osd, uint64_t uidbytes)
+{
+ uint64_t refdbytes, availbytes, usedobjs, availobjs;
+ uint64_t uidobjs;
+
+ /* get fresh statfs info */
+ dmu_objset_space(osd->od_os, &refdbytes, &availbytes,
+ &usedobjs, &availobjs);
+
+ /* estimate the number of objects based on the disk usage */
+ uidobjs = osd_objs_count_estimate(refdbytes, usedobjs,
+ uidbytes >> SPA_MAXBLOCKSHIFT);
+ if (uidbytes > 0)
+ /* if we have at least 1 byte, we have at least one dnode ... */
+ uidobjs = max_t(uint64_t, uidobjs, 1);
+
+ return uidobjs;
+}
+
+/**
* Space Accounting Management
*/
* not associated with any dmu_but_t (see dnode_special_open()).
* As a consequence, we cannot use udmu_zap_lookup() here since it
* requires a valid oo_db. */
- rc = -zap_lookup(osd->od_objset.os, oid, buf, sizeof(uint64_t), 1,
+ rc = -zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1,
&rec->bspace);
if (rc == -ENOENT)
/* user/group has not created anything yet */
if (osd->od_quota_iused_est) {
if (rec->bspace != 0)
/* estimate #inodes in use */
- rec->ispace = udmu_objset_user_iused(&osd->od_objset,
- rec->bspace);
+ rec->ispace = osd_objset_user_iused(osd, rec->bspace);
RETURN(+1);
}
/* as for inode accounting, it is not maintained by DMU, so we just
* use our own ZAP to track inode usage */
- rc = -zap_lookup(osd->od_objset.os, obj->oo_db->db_object,
+ rc = -zap_lookup(osd->od_os, obj->oo_db->db_object,
buf, sizeof(uint64_t), 1, &rec->ispace);
if (rc == -ENOENT)
/* user/group has not created any file yet */
it->oiq_oid = osd_quota_fid2dmu(lu_object_fid(lo));
/* initialize zap cursor */
- rc = -udmu_zap_cursor_init(&it->oiq_zc, &osd->od_objset, it->oiq_oid,0);
+ rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0);
if (rc)
RETURN(ERR_PTR(rc));
{
struct osd_it_quota *it = (struct osd_it_quota *)di;
ENTRY;
- udmu_zap_cursor_fini(it->oiq_zc);
+ osd_zap_cursor_fini(it->oiq_zc);
lu_object_put(env, &it->oiq_obj->oo_dt.do_lu);
EXIT;
}
static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
{
struct osd_it_quota *it = (struct osd_it_quota *)di;
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
int rc;
ENTRY;
if (it->oiq_reset == 0)
zap_cursor_advance(it->oiq_zc);
it->oiq_reset = 0;
- rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32);
+ rc = -zap_cursor_retrieve(it->oiq_zc, za);
if (rc == -ENOENT) /* reached the end */
- RETURN(+1);
+ rc = 1;
RETURN(rc);
}
const struct dt_it *di)
{
struct osd_it_quota *it = (struct osd_it_quota *)di;
- struct osd_thread_info *info = osd_oti_get(env);
- char *buf = info->oti_buf;
- char *p;
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
int rc;
ENTRY;
it->oiq_reset = 0;
- rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32);
+ rc = -zap_cursor_retrieve(it->oiq_zc, za);
if (rc)
RETURN(ERR_PTR(rc));
- it->oiq_id = simple_strtoull(buf, &p, 16);
+ rc = kstrtoull(za->za_name, 16, &it->oiq_id);
+
RETURN((struct dt_key *) &it->oiq_id);
}
RETURN((int)sizeof(uint64_t));
}
+/*
+ * zap_cursor_retrieve read from current record.
+ * to read bytes we need to call zap_lookup explicitly.
+ */
+static int osd_zap_cursor_retrieve_value(const struct lu_env *env,
+ zap_cursor_t *zc, char *buf,
+ int buf_size, int *bytes_read)
+{
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
+ int rc, actual_size;
+
+ rc = -zap_cursor_retrieve(zc, za);
+ if (unlikely(rc != 0))
+ return -rc;
+
+ if (unlikely(za->za_integer_length <= 0))
+ return -ERANGE;
+
+ actual_size = za->za_integer_length * za->za_num_integers;
+
+ if (actual_size > buf_size) {
+ actual_size = buf_size;
+ buf_size = actual_size / za->za_integer_length;
+ } else {
+ buf_size = za->za_num_integers;
+ }
+
+ rc = -zap_lookup(zc->zc_objset, zc->zc_zapobj,
+ za->za_name, za->za_integer_length,
+ buf_size, buf);
+
+ if (likely(rc == 0))
+ *bytes_read = actual_size;
+
+ return rc;
+}
+
/**
* Return pointer to the record under iterator.
*
struct dt_rec *dtrec, __u32 attr)
{
struct osd_thread_info *info = osd_oti_get(env);
- char *buf = info->oti_buf;
+ zap_attribute_t *za = &info->oti_za;
struct osd_it_quota *it = (struct osd_it_quota *)di;
struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
struct osd_object *obj = it->oiq_obj;
rec->ispace = rec->bspace = 0;
/* retrieve block usage from the DMU accounting object */
- rc = -udmu_zap_cursor_retrieve_value(env, it->oiq_zc,
- (char *)&rec->bspace,
- sizeof(uint64_t), &bytes_read);
+ rc = osd_zap_cursor_retrieve_value(env, it->oiq_zc,
+ (char *)&rec->bspace,
+ sizeof(uint64_t), &bytes_read);
if (rc)
RETURN(rc);
if (osd->od_quota_iused_est) {
if (rec->bspace != 0)
/* estimate #inodes in use */
- rec->ispace = udmu_objset_user_iused(&osd->od_objset,
- rec->bspace);
+ rec->ispace = osd_objset_user_iused(osd, rec->bspace);
RETURN(0);
}
/* retrieve key associated with the current cursor */
- rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, buf, 32);
- if (rc)
+ rc = -zap_cursor_retrieve(it->oiq_zc, za);
+ if (unlikely(rc != 0))
RETURN(rc);
/* inode accounting is not maintained by DMU, so we use our own ZAP to
* track inode usage */
- rc = -zap_lookup(osd->od_objset.os, it->oiq_obj->oo_db->db_object,
- buf, sizeof(uint64_t), 1, &rec->ispace);
+ rc = -zap_lookup(osd->od_os, it->oiq_obj->oo_db->db_object,
+ za->za_name, sizeof(uint64_t), 1, &rec->ispace);
if (rc == -ENOENT)
/* user/group has not created any file yet */
CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
- osd->od_svname, buf);
+ osd->od_svname, za->za_name);
else if (rc)
RETURN(rc);
struct osd_it_quota *it = (struct osd_it_quota *)di;
ENTRY;
it->oiq_reset = 0;
- RETURN(udmu_zap_cursor_serialize(it->oiq_zc));
+ RETURN(osd_zap_cursor_serialize(it->oiq_zc));
}
/**
{
struct osd_it_quota *it = (struct osd_it_quota *)di;
struct osd_device *osd = osd_obj2dev(it->oiq_obj);
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
zap_cursor_t *zc;
int rc;
ENTRY;
/* create new cursor pointing to the new hash */
- rc = -udmu_zap_cursor_init(&zc, &osd->od_objset, it->oiq_oid, hash);
+ rc = osd_zap_cursor_init(&zc, osd->od_os, it->oiq_oid, hash);
if (rc)
RETURN(rc);
- udmu_zap_cursor_fini(it->oiq_zc);
+ osd_zap_cursor_fini(it->oiq_zc);
it->oiq_zc = zc;
it->oiq_reset = 0;
- rc = -udmu_zap_cursor_retrieve_key(env, it->oiq_zc, NULL, 32);
+ rc = -zap_cursor_retrieve(it->oiq_zc, za);
if (rc == 0)
- RETURN(+1);
+ rc = 1;
else if (rc == -ENOENT)
- RETURN(0);
+ rc = 0;
+
RETURN(rc);
}
*
* No locking is done here.
*/
-int __osd_xattr_load(udmu_objset_t *uos, uint64_t dnode, nvlist_t **sa_xattr)
+int __osd_xattr_load(struct osd_device *osd, uint64_t dnode, nvlist_t **sa)
{
sa_handle_t *sa_hdl;
char *buf;
if (unlikely(dnode == ZFS_NO_OBJECT))
return -ENOENT;
- rc = -sa_handle_get(uos->os, dnode, NULL, SA_HDL_PRIVATE, &sa_hdl);
+ rc = -sa_handle_get(osd->od_os, dnode, NULL, SA_HDL_PRIVATE, &sa_hdl);
if (rc)
return rc;
- rc = -sa_size(sa_hdl, SA_ZPL_DXATTR(uos), &size);
+ rc = -sa_size(sa_hdl, SA_ZPL_DXATTR(osd), &size);
if (rc) {
if (rc == -ENOENT)
- rc = -nvlist_alloc(sa_xattr, NV_UNIQUE_NAME, KM_SLEEP);
+ rc = -nvlist_alloc(sa, NV_UNIQUE_NAME, KM_SLEEP);
goto out_sa;
}
rc = -ENOMEM;
goto out_sa;
}
- rc = -sa_lookup(sa_hdl, SA_ZPL_DXATTR(uos), buf, size);
+ rc = -sa_lookup(sa_hdl, SA_ZPL_DXATTR(osd), buf, size);
if (rc == 0)
- rc = -nvlist_unpack(buf, size, sa_xattr, KM_SLEEP);
+ rc = -nvlist_unpack(buf, size, sa, KM_SLEEP);
sa_spill_free(buf);
out_sa:
sa_handle_destroy(sa_hdl);
LASSERT(obj->oo_sa_xattr == NULL);
LASSERT(obj->oo_db != NULL);
- return __osd_xattr_load(&osd_obj2dev(obj)->od_objset,
- obj->oo_db->db_object, &obj->oo_sa_xattr);
+ return __osd_xattr_load(osd_obj2dev(obj), obj->oo_db->db_object,
+ &obj->oo_sa_xattr);
}
int __osd_sa_xattr_get(const struct lu_env *env, struct osd_object *obj,
- const struct lu_buf *buf, const char *name, int *sizep)
+ const struct lu_buf *buf, const char *name, int *sizep)
{
uchar_t *nv_value;
int rc;
return 0;
}
-int __osd_xattr_get_large(const struct lu_env *env, udmu_objset_t *uos,
+int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
uint64_t xattr, struct lu_buf *buf,
const char *name, int *sizep)
{
return -ENOENT;
/* Lookup the object number containing the xattr data */
- rc = -zap_lookup(uos->os, xattr, name, sizeof(uint64_t), 1,
+ rc = -zap_lookup(osd->od_os, xattr, name, sizeof(uint64_t), 1,
&xa_data_obj);
if (rc)
return rc;
- rc = __osd_obj2dbuf(env, uos->os, xa_data_obj, &xa_data_db, FTAG);
+ rc = __osd_obj2dbuf(env, osd->od_os, xa_data_obj, &xa_data_db);
if (rc)
return rc;
- rc = -sa_handle_get(uos->os, xa_data_obj, NULL, SA_HDL_PRIVATE,
+ rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE,
&sa_hdl);
if (rc)
goto out_rele;
/* Get the xattr value length / object size */
- rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(uos), &size, 8);
+ rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
if (rc)
goto out;
goto out;
}
- rc = -dmu_read(uos->os, xa_data_db->db_object, 0,
+ rc = -dmu_read(osd->od_os, xa_data_db->db_object, 0,
size, buf->lb_buf, DMU_READ_PREFETCH);
out:
}
int __osd_xattr_get(const struct lu_env *env, struct osd_object *obj,
- struct lu_buf *buf, const char *name, int *sizep)
+ struct lu_buf *buf, const char *name, int *sizep)
{
int rc;
if (rc != -ENOENT)
return rc;
- rc = __osd_xattr_get_large(env, &osd_obj2dev(obj)->od_objset,
- obj->oo_xattr, buf, name, sizep);
+ rc = __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
+ buf, name, sizep);
return rc;
}
int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
- struct lu_buf *buf, const char *name,
- struct lustre_capa *capa)
+ struct lu_buf *buf, const char *name,
+ struct lustre_capa *capa)
{
struct osd_object *obj = osd_dt_obj(dt);
int rc, size = 0;
}
void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj,
- int vallen, const char *name, struct osd_thandle *oh)
+ int vallen, const char *name,
+ struct osd_thandle *oh)
{
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
dmu_buf_t *db = obj->oo_db;
dmu_tx_t *tx = oh->ot_tx;
uint64_t xa_data_obj;
return;
}
- rc = -zap_lookup(uos->os, obj->oo_xattr, name, sizeof(uint64_t), 1,
+ rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
&xa_data_obj);
if (rc == 0) {
/*
}
int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt,
- const struct lu_buf *buf, const char *name,
- int fl, struct thandle *handle)
+ const struct lu_buf *buf, const char *name,
+ int fl, struct thandle *handle)
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_thandle *oh;
*/
static int
__osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj,
- struct osd_thandle *oh)
+ struct osd_thandle *oh)
{
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
char *dxattr;
size_t sa_size;
int rc;
if (rc)
GOTO(out_free, rc);
- rc = osd_object_sa_update(obj, SA_ZPL_DXATTR(uos), dxattr, sa_size, oh);
+ rc = osd_object_sa_update(obj, SA_ZPL_DXATTR(osd), dxattr, sa_size, oh);
out_free:
sa_spill_free(dxattr);
RETURN(rc);
}
int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj,
- const struct lu_buf *buf, const char *name, int fl,
- struct osd_thandle *oh)
+ const struct lu_buf *buf, const char *name, int fl,
+ struct osd_thandle *oh)
{
uchar_t *nv_value;
size_t size;
/* Ensure xattr doesn't exist in ZAP */
if (obj->oo_xattr != ZFS_NO_OBJECT) {
- udmu_objset_t *uos = &osd_obj2dev(obj)->od_objset;
- uint64_t xa_data_obj;
- rc = -zap_lookup(uos->os, obj->oo_xattr,
- name, 8, 1, &xa_data_obj);
+ struct osd_device *osd = osd_obj2dev(obj);
+ uint64_t objid;
+ rc = -zap_lookup(osd->od_os, obj->oo_xattr,
+ name, 8, 1, &objid);
if (rc == 0) {
- rc = __osd_object_free(uos, xa_data_obj, oh->ot_tx);
+ rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx);
if (rc == 0)
- zap_remove(uos->os, obj->oo_xattr,
+ zap_remove(osd->od_os, obj->oo_xattr,
name, oh->ot_tx);
}
}
struct osd_thandle *oh)
{
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
dmu_buf_t *xa_zap_db = NULL;
dmu_buf_t *xa_data_db = NULL;
uint64_t xa_data_obj;
la->la_valid = LA_MODE;
la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
- rc = __osd_zap_create(env, uos, &xa_zap_db, tx, la,
- obj->oo_db->db_object, FTAG, 0);
+ rc = __osd_zap_create(env, osd, &xa_zap_db, tx, la,
+ obj->oo_db->db_object, 0);
if (rc)
return rc;
obj->oo_xattr = xa_zap_db->db_object;
- rc = osd_object_sa_update(obj, SA_ZPL_XATTR(uos),
+ rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
&obj->oo_xattr, 8, oh);
if (rc)
goto out;
}
- rc = -zap_lookup(uos->os, obj->oo_xattr, name, sizeof(uint64_t), 1,
- &xa_data_obj);
+ rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
+ &xa_data_obj);
if (rc == 0) {
if (fl & LU_XATTR_CREATE) {
rc = -EEXIST;
* Entry already exists.
* We'll truncate the existing object.
*/
- rc = __osd_obj2dbuf(env, uos->os, xa_data_obj,
- &xa_data_db, FTAG);
+ rc = __osd_obj2dbuf(env, osd->od_os, xa_data_obj,
+ &xa_data_db);
if (rc)
goto out;
- rc = -sa_handle_get(uos->os, xa_data_obj, NULL,
+ rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
SA_HDL_PRIVATE, &sa_hdl);
if (rc)
goto out;
- rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(uos), &size, 8);
+ rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
if (rc)
goto out_sa;
- rc = -dmu_free_range(uos->os, xa_data_db->db_object,
+ rc = -dmu_free_range(osd->od_os, xa_data_db->db_object,
0, DMU_OBJECT_END, tx);
if (rc)
goto out_sa;
la->la_valid = LA_MODE;
la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
- rc = __osd_object_create(env, uos, &xa_data_db, tx, la,
- obj->oo_xattr, FTAG);
+ rc = __osd_object_create(env, osd, &xa_data_db, tx, la,
+ obj->oo_xattr);
if (rc)
goto out;
xa_data_obj = xa_data_db->db_object;
- rc = -sa_handle_get(uos->os, xa_data_obj, NULL,
+ rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
SA_HDL_PRIVATE, &sa_hdl);
if (rc)
goto out;
- rc = -zap_add(uos->os, obj->oo_xattr, name, sizeof(uint64_t),
+ rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
1, &xa_data_obj, tx);
if (rc)
goto out_sa;
}
/* Finally write the xattr value */
- dmu_write(uos->os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);
+ dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);
size = buf->lb_len;
- rc = -sa_update(sa_hdl, SA_ZPL_SIZE(uos), &size, 8, tx);
+ rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);
out_sa:
sa_handle_destroy(sa_hdl);
const char *name, struct osd_thandle *oh)
{
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
dmu_tx_t *tx = oh->ot_tx;
uint64_t xa_data_obj;
int rc;
if (obj->oo_xattr == ZFS_NO_OBJECT)
return;
- rc = -zap_lookup(uos->os, obj->oo_xattr, name, 8, 1, &xa_data_obj);
+ rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, 8, 1, &xa_data_obj);
if (rc == 0) {
/*
* Entry exists.
}
int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt,
- const char *name, struct thandle *handle)
+ const char *name, struct thandle *handle)
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_thandle *oh;
}
int __osd_sa_xattr_del(const struct lu_env *env, struct osd_object *obj,
- const char *name, struct osd_thandle *oh)
+ const char *name, struct osd_thandle *oh)
{
int rc;
}
int __osd_xattr_del(const struct lu_env *env, struct osd_object *obj,
- const char *name, struct osd_thandle *oh)
+ const char *name, struct osd_thandle *oh)
{
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
uint64_t xa_data_obj;
int rc;
if (obj->oo_xattr == ZFS_NO_OBJECT)
return 0;
- rc = -zap_lookup(uos->os, obj->oo_xattr, name, sizeof(uint64_t), 1,
+ rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
&xa_data_obj);
if (rc == -ENOENT) {
rc = 0;
* Entry exists.
* We'll delete the existing object and ZAP entry.
*/
- rc = __osd_object_free(uos, xa_data_obj, oh->ot_tx);
+ rc = -dmu_object_free(osd->od_os, xa_data_obj, oh->ot_tx);
if (rc)
return rc;
- rc = -zap_remove(uos->os, obj->oo_xattr, name, oh->ot_tx);
+ rc = -zap_remove(osd->od_os, obj->oo_xattr, name, oh->ot_tx);
}
return rc;
}
int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
- const char *name, struct thandle *handle,
- struct lustre_capa *capa)
+ const char *name, struct thandle *handle,
+ struct lustre_capa *capa)
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_thandle *oh;
static int
osd_sa_xattr_list(const struct lu_env *env, struct osd_object *obj,
- struct lu_buf *lb)
+ struct lu_buf *lb)
{
nvpair_t *nvp = NULL;
int len, counted = 0, remain = lb->lb_len;
}
int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
- struct lu_buf *lb, struct lustre_capa *capa)
+ struct lu_buf *lb, struct lustre_capa *capa)
{
- struct osd_thread_info *oti = osd_oti_get(env);
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
+ zap_attribute_t *za = &osd_oti_get(env)->oti_za;
zap_cursor_t *zc;
int rc, counted = 0, remain = lb->lb_len;
ENTRY;
if (obj->oo_xattr == ZFS_NO_OBJECT)
GOTO(out, rc = counted);
- rc = -udmu_zap_cursor_init(&zc, uos, obj->oo_xattr, 0);
+ rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
if (rc)
GOTO(out, rc);
- while ((rc = -udmu_zap_cursor_retrieve_key(env, zc, oti->oti_key,
- MAXNAMELEN)) == 0) {
+ while ((rc = -zap_cursor_retrieve(zc, za)) == 0) {
if (!osd_obj2dev(obj)->od_posix_acl &&
- (strcmp(oti->oti_key, POSIX_ACL_XATTR_ACCESS) == 0 ||
- strcmp(oti->oti_key, POSIX_ACL_XATTR_DEFAULT) == 0)) {
+ (strcmp(za->za_name, POSIX_ACL_XATTR_ACCESS) == 0 ||
+ strcmp(za->za_name, POSIX_ACL_XATTR_DEFAULT) == 0)) {
zap_cursor_advance(zc);
continue;
}
- rc = strlen(oti->oti_key);
+ rc = strlen(za->za_name);
if (lb->lb_buf != NULL) {
if (rc + 1 > remain)
RETURN(-ERANGE);
- memcpy(lb->lb_buf, oti->oti_key, rc);
+ memcpy(lb->lb_buf, za->za_name, rc);
lb->lb_buf += rc;
*((char *)lb->lb_buf) = '\0';
lb->lb_buf++;
rc = counted;
out_fini:
- udmu_zap_cursor_fini(zc);
+ osd_zap_cursor_fini(zc);
out:
up(&obj->oo_guard);
RETURN(rc);
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2012, Intel Corporation.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/osd-zfs/udmu.c
- * Module that interacts with the ZFS DMU and provides an abstraction
- * to the rest of Lustre.
- *
- * Author: Alex Zhuravlev <bzzz@whamcloud.com>
- * Author: Atul Vidwansa <atul.vidwansa@sun.com>
- * Author: Manoj Joseph <manoj.joseph@sun.com>
- * Author: Mike Pershin <tappro@whamcloud.com>
- */
-
-#include <lustre/lustre_idl.h> /* OBD_OBJECT_EOF */
-#include <lustre/lustre_user.h> /* struct obd_statfs */
-
-#include <sys/dnode.h>
-#include <sys/dbuf.h>
-#include <sys/spa.h>
-#include <sys/stat.h>
-#include <sys/zap.h>
-#include <sys/spa_impl.h>
-#include <sys/zfs_znode.h>
-#include <sys/dmu_tx.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_prop.h>
-#include <sys/sa_impl.h>
-#include <sys/txg.h>
-
-#include "udmu.h"
-
-int udmu_blk_insert_cost(void)
-{
- int max_blockshift, nr_blkptrshift;
-
- /* max_blockshift is the log2 of the number of blocks needed to reach
- * the maximum filesize (that's to say 2^64) */
- max_blockshift = DN_MAX_OFFSET_SHIFT - SPA_MAXBLOCKSHIFT;
-
- /* nr_blkptrshift is the log2 of the number of block pointers that can
- * be stored in an indirect block */
- CLASSERT(DN_MAX_INDBLKSHIFT > SPA_BLKPTRSHIFT);
- nr_blkptrshift = DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT;
-
- /* max_blockshift / nr_blkptrshift is thus the maximum depth of the
- * tree. We add +1 for rounding purpose.
- * The tree depth times the indirect block size gives us the maximum
- * cost of inserting a block in the tree */
- return (max_blockshift / nr_blkptrshift + 1) * (1 << DN_MAX_INDBLKSHIFT);
-}
-
-int udmu_objset_open(char *osname, udmu_objset_t *uos)
-{
- uint64_t refdbytes, availbytes, usedobjs, availobjs;
- uint64_t version = ZPL_VERSION;
- uint64_t sa_obj;
- int error;
-
- memset(uos, 0, sizeof(udmu_objset_t));
-
- error = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, uos, &uos->os);
- if (error) {
- uos->os = NULL;
- goto out;
- }
-
- /* Check ZFS version */
- error = zap_lookup(uos->os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1,
- &version);
- if (error) {
- CERROR("%s: Error looking up ZPL VERSION\n", osname);
- /*
- * We can't return ENOENT because that would mean the objset
- * didn't exist.
- */
- error = EIO;
- goto out;
- }
-
- error = zap_lookup(uos->os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
- &sa_obj);
- if (error)
- goto out;
-
- error = sa_setup(uos->os, sa_obj, zfs_attr_table, ZPL_END,
- &uos->z_attr_table);
- if (error)
- goto out;
-
- error = zap_lookup(uos->os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
- &uos->root);
- if (error) {
- CERROR("%s: Error looking up ZFS root object.\n", osname);
- error = EIO;
- goto out;
- }
- ASSERT(uos->root != 0);
-
- /* Check that user/group usage tracking is supported */
- if (!dmu_objset_userused_enabled(uos->os) ||
- DMU_USERUSED_DNODE(uos->os)->dn_type != DMU_OT_USERGROUP_USED ||
- DMU_GROUPUSED_DNODE(uos->os)->dn_type != DMU_OT_USERGROUP_USED) {
- CERROR("%s: Space accounting not supported by this target, "
- "aborting\n", osname);
- error = ENOTSUPP;
- goto out;
- }
-
- /*
- * as DMU doesn't maintain f_files absolutely actual (it's updated
- * at flush, not when object is create/destroyed) we've implemented
- * own counter which is initialized from on-disk at mount, then is
- * being maintained by DMU OSD
- */
- dmu_objset_space(uos->os, &refdbytes, &availbytes, &usedobjs,
- &availobjs);
- uos->objects = usedobjs;
- spin_lock_init(&uos->lock);
-
-out:
- if (error && uos->os != NULL)
- dmu_objset_disown(uos->os, uos);
-
- return error;
-}
-
-void udmu_objset_close(udmu_objset_t *uos)
-{
- ASSERT(uos->os != NULL);
-
- /*
- * Force a txg sync. This should not be needed, neither for
- * correctness nor safety. Presumably, we are only doing
- * this to force commit callbacks to be called sooner.
- */
- txg_wait_synced(dmu_objset_pool(uos->os), 0ULL);
-
- /* close the object set */
- dmu_objset_disown(uos->os, uos);
-
- uos->os = NULL;
-}
-
-/* Estimate the number of objects from a number of blocks */
-static uint64_t udmu_objs_count_estimate(uint64_t refdbytes,
- uint64_t usedobjs,
- uint64_t nrblocks)
-{
- uint64_t est_objs, est_refdblocks, est_usedobjs;
-
- /* Compute an nrblocks estimate based on the actual number of
- * dnodes that could fit in the space. Since we don't know the
- * overhead associated with each dnode (xattrs, SAs, VDEV overhead,
- * etc) just using DNODE_SHIFT isn't going to give a good estimate.
- * Instead, compute an estimate based on the average space usage per
- * dnode, with an upper and lower cap.
- *
- * In case there aren't many dnodes or blocks used yet, add a small
- * correction factor using OSD_DNODE_EST_SHIFT. This correction
- * factor gradually disappears as the number of real dnodes grows.
- * This also avoids the need to check for divide-by-zero later.
- */
- CLASSERT(OSD_DNODE_MIN_BLKSHIFT > 0);
- CLASSERT(OSD_DNODE_EST_BLKSHIFT > 0);
-
- est_refdblocks = (refdbytes >> SPA_MAXBLOCKSHIFT) +
- (OSD_DNODE_EST_COUNT >> OSD_DNODE_EST_BLKSHIFT);
- est_usedobjs = usedobjs + OSD_DNODE_EST_COUNT;
-
- /* Average space/dnode more than maximum dnode size, use max dnode
- * size to estimate free dnodes from adjusted free blocks count.
- * OSTs typically use more than one block dnode so this case applies. */
- if (est_usedobjs <= est_refdblocks * 2) {
- est_objs = nrblocks;
-
- /* Average space/dnode smaller than min dnode size (probably due to
- * metadnode compression), use min dnode size to estimate the number of
- * objects.
- * An MDT typically uses below 512 bytes/dnode so this case applies. */
- } else if (est_usedobjs >= (est_refdblocks << OSD_DNODE_MIN_BLKSHIFT)) {
- est_objs = nrblocks << OSD_DNODE_MIN_BLKSHIFT;
-
- /* Between the extremes, we try to use the average size of
- * existing dnodes to compute the number of dnodes that fit
- * into nrblocks:
- *
- * est_objs = nrblocks * (est_usedobjs / est_refblocks);
- *
- * but this may overflow 64 bits or become 0 if not handled well
- *
- * We know nrblocks is below (64 - 17 = 47) bits from
- * SPA_MAXBLKSHIFT, and est_usedobjs is under 48 bits due to
- * DN_MAX_OBJECT_SHIFT, which means that multiplying them may
- * get as large as 2 ^ 95.
- *
- * We also know (est_usedobjs / est_refdblocks) is between 2 and
- * 256, due to above checks, so we can safely compute this first.
- * We care more about accuracy on the MDT (many dnodes/block)
- * which is good because this is where truncation errors are
- * smallest. This adds 8 bits to nrblocks so we can use 7 bits
- * to compute a fixed-point fraction and nrblocks can still fit
- * in 64 bits. */
- } else {
- unsigned dnodes_per_block = (est_usedobjs << 7)/est_refdblocks;
-
- est_objs = (nrblocks * dnodes_per_block) >> 7;
- }
- return est_objs;
-}
-
-int udmu_objset_statfs(udmu_objset_t *uos, struct obd_statfs *osfs)
-{
- uint64_t refdbytes, availbytes, usedobjs, availobjs;
- uint64_t est_availobjs;
- uint64_t reserved;
-
- dmu_objset_space(uos->os, &refdbytes, &availbytes, &usedobjs,
- &availobjs);
-
- /*
- * ZFS allows multiple block sizes. For statfs, Linux makes no
- * proper distinction between bsize and frsize. For calculations
- * of free and used blocks incorrectly uses bsize instead of frsize,
- * but bsize is also used as the optimal blocksize. We return the
- * largest possible block size as IO size for the optimum performance
- * and scale the free and used blocks count appropriately.
- */
- osfs->os_bsize = 1ULL << SPA_MAXBLOCKSHIFT;
-
- osfs->os_blocks = (refdbytes + availbytes) >> SPA_MAXBLOCKSHIFT;
- osfs->os_bfree = availbytes >> SPA_MAXBLOCKSHIFT;
- osfs->os_bavail = osfs->os_bfree; /* no extra root reservation */
-
- /* Take replication (i.e. number of copies) into account */
- osfs->os_bavail /= uos->os->os_copies;
-
- /*
- * Reserve some space so we don't run into ENOSPC due to grants not
- * accounting for metadata overhead in ZFS, and to avoid fragmentation.
- * Rather than report this via os_bavail (which makes users unhappy if
- * they can't fill the filesystem 100%), reduce os_blocks as well.
- *
- * Reserve 0.78% of total space, at least 4MB for small filesystems,
- * for internal files to be created/unlinked when space is tight.
- */
- CLASSERT(OSD_STATFS_RESERVED_BLKS > 0);
- if (likely(osfs->os_blocks >=
- OSD_STATFS_RESERVED_BLKS << OSD_STATFS_RESERVED_SHIFT))
- reserved = osfs->os_blocks >> OSD_STATFS_RESERVED_SHIFT;
- else
- reserved = OSD_STATFS_RESERVED_BLKS;
-
- osfs->os_blocks -= reserved;
- osfs->os_bfree -= MIN(reserved, osfs->os_bfree);
- osfs->os_bavail -= MIN(reserved, osfs->os_bavail);
-
- /*
- * The availobjs value returned from dmu_objset_space() is largely
- * useless, since it reports the number of objects that might
- * theoretically still fit into the dataset, independent of minor
- * issues like how much space is actually available in the pool.
- * Compute a better estimate in udmu_objs_count_estimate().
- */
- est_availobjs = udmu_objs_count_estimate(refdbytes, usedobjs,
- osfs->os_bfree);
-
- osfs->os_ffree = min(availobjs, est_availobjs);
- osfs->os_files = osfs->os_ffree + uos->objects;
-
- /* ZFS XXX: fill in backing dataset FSID/UUID
- memcpy(osfs->os_fsid, .... );*/
-
- /* We're a zfs filesystem. */
- osfs->os_type = UBERBLOCK_MAGIC;
-
- /* ZFS XXX: fill in appropriate OS_STATE_{DEGRADED,READONLY} flags
- osfs->os_state = vf_to_stf(vfsp->vfs_flag);
- if (sb->s_flags & MS_RDONLY)
- osfs->os_state = OS_STATE_READONLY;
- */
-
- osfs->os_namelen = MAXNAMELEN;
- osfs->os_maxbytes = OBD_OBJECT_EOF;
-
- return 0;
-}
-
-/**
- * Helper function to estimate the number of inodes in use for a give uid/gid
- * from the block usage
- */
-uint64_t udmu_objset_user_iused(udmu_objset_t *uos, uint64_t uidbytes)
-{
- uint64_t refdbytes, availbytes, usedobjs, availobjs;
- uint64_t uidobjs;
-
- /* get fresh statfs info */
- dmu_objset_space(uos->os, &refdbytes, &availbytes, &usedobjs,
- &availobjs);
-
- /* estimate the number of objects based on the disk usage */
- uidobjs = udmu_objs_count_estimate(refdbytes, usedobjs,
- uidbytes >> SPA_MAXBLOCKSHIFT);
- if (uidbytes > 0)
- /* if we have at least 1 byte, we have at least one dnode ... */
- uidobjs = max_t(uint64_t, uidobjs, 1);
- return uidobjs;
-}
-
-/* We don't actually have direct access to the zap_hashbits() function
- * so just pretend like we do for now. If this ever breaks we can look at
- * it at that time. */
-#define zap_hashbits(zc) 48
-/*
- * ZFS hash format:
- * | cd (16 bits) | hash (48 bits) |
- * we need it in other form:
- * |0| hash (48 bit) | cd (15 bit) |
- * to be a full 64-bit ordered hash so that Lustre readdir can use it to merge
- * the readdir hashes from multiple directory stripes uniformly on the client.
- * Another point is sign bit, the hash range should be in [0, 2^63-1] because
- * loff_t (for llseek) needs to be a positive value. This means the "cd" field
- * should only be the low 15 bits.
- */
-uint64_t udmu_zap_cursor_serialize(zap_cursor_t *zc)
-{
- uint64_t zfs_hash = zap_cursor_serialize(zc) & (~0ULL >> 1);
-
- return (zfs_hash >> zap_hashbits(zc)) |
- (zfs_hash << (63 - zap_hashbits(zc)));
-}
-
-void udmu_zap_cursor_init_serialized(zap_cursor_t *zc, udmu_objset_t *uos,
- uint64_t zapobj, uint64_t dirhash)
-{
- uint64_t zfs_hash = ((dirhash << zap_hashbits(zc)) & (~0ULL >> 1)) |
- (dirhash >> (63 - zap_hashbits(zc)));
- zap_cursor_init_serialized(zc, uos->os, zapobj, zfs_hash);
-}
-
-/*
- * Zap cursor APIs
- */
-int udmu_zap_cursor_init(zap_cursor_t **zc, udmu_objset_t *uos,
- uint64_t zapobj, uint64_t dirhash)
-{
- zap_cursor_t *t;
-
- t = kmem_alloc(sizeof(*t), KM_NOSLEEP);
- if (t) {
- udmu_zap_cursor_init_serialized(t, uos, zapobj, dirhash);
- *zc = t;
- return 0;
- }
- return (ENOMEM);
-}
-
-void udmu_zap_cursor_fini(zap_cursor_t *zc)
-{
- zap_cursor_fini(zc);
- kmem_free(zc, sizeof(*zc));
-}
-
-/*
- * Get the object id from dmu_buf_t
- */
-int udmu_object_is_zap(dmu_buf_t *db)
-{
- dmu_buf_impl_t *dbi = (dmu_buf_impl_t *) db;
- dnode_t *dn;
- int rc;
-
- DB_DNODE_ENTER(dbi);
-
- dn = DB_DNODE(dbi);
- rc = (dn->dn_type == DMU_OT_DIRECTORY_CONTENTS ||
- dn->dn_type == DMU_OT_USERGROUP_USED);
-
- DB_DNODE_EXIT(dbi);
-
- return rc;
-}
-
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2012, Intel Corporation.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/osd-zfs/udmu.h
- *
- * Author: Alex Tomas <alex@clusterfs.com>
- * Author: Atul Vidwansa <atul.vidwansa@sun.com>
- * Author: Manoj Joseph <manoj.joseph@sun.com>
- */
-
-#ifndef _DMU_H
-#define _DMU_H
-
-#include <sys/zap.h>
-#include <sys/mode.h>
-#include <sys/sa.h>
-
-#include <lustre/lustre_user.h>
-
-typedef struct udmu_objset {
- struct objset *os;
- uint64_t root; /* id of root znode */
- spinlock_t lock; /* protects objects below */
- uint64_t objects; /* in-core counter of objects */
- /* SA attr mapping->id,
- * name is the same as in ZFS to use defines SA_ZPL_...*/
- sa_attr_type_t *z_attr_table;
-} udmu_objset_t;
-
-#ifndef _SYS_TXG_H
-#define TXG_WAIT 1ULL
-#define TXG_NOWAIT 2ULL
-#endif
-
-#define ZFS_DIRENT_MAKE(type, obj) (((uint64_t)type << 60) | obj)
-
-/* Statfs space reservation for grant, fragmentation, and unlink space. */
-#define OSD_STATFS_RESERVED_BLKS (1ULL << (22 - SPA_MAXBLOCKSHIFT)) /* 4MB */
-#define OSD_STATFS_RESERVED_SHIFT (7) /* reserve 0.78% of all space */
-
-/* Statfs {minimum, safe estimate, and maximum} dnodes per block */
-#define OSD_DNODE_MIN_BLKSHIFT (SPA_MAXBLOCKSHIFT - DNODE_SHIFT) /* 17-9 =8 */
-#define OSD_DNODE_EST_BLKSHIFT (SPA_MAXBLOCKSHIFT - 12) /* 17-12=5 */
-#define OSD_DNODE_EST_COUNT 1024
-
-#define OSD_GRANT_FOR_LOCAL_OIDS (2ULL << 20) /* 2MB for last_rcvd, ... */
-
-void udmu_init(void);
-void udmu_fini(void);
-
-/* udmu object-set API */
-int udmu_objset_open(char *osname, udmu_objset_t *uos);
-void udmu_objset_close(udmu_objset_t *uos);
-int udmu_objset_statfs(udmu_objset_t *uos, struct obd_statfs *osfs);
-uint64_t udmu_objset_user_iused(udmu_objset_t *uos, uint64_t uidbytes);
-int udmu_objset_root(udmu_objset_t *uos, dmu_buf_t **dbp, void *tag);
-uint64_t udmu_get_txg(udmu_objset_t *uos, dmu_tx_t *tx);
-int udmu_blk_insert_cost(void);
-
-/* zap cursor apis */
-int udmu_zap_cursor_init(zap_cursor_t **zc, udmu_objset_t *uos,
- uint64_t zapobj, uint64_t hash);
-
-void udmu_zap_cursor_fini(zap_cursor_t *zc);
-
-void udmu_zap_cursor_advance(zap_cursor_t *zc);
-
-uint64_t udmu_zap_cursor_serialize(zap_cursor_t *zc);
-
-int udmu_zap_cursor_move_to_key(zap_cursor_t *zc, const char *name);
-
-/* Commit callbacks */
-int udmu_object_is_zap(dmu_buf_t *);
-
-#endif /* _DMU_H */
}
# CMD: determine mds index where directory inode presents
-get_mds_dir () {
+get_mds_dir() {
local dir=$1
- local file=$dir/f0.get_mds_dir_tmpfile
+ local SEQ
- mkdir -p $dir
- rm -f $file
- sleep 1
- local iused=$(lfs df -i $dir | grep MDT | awk '{print $3}')
- local -a oldused=($iused)
-
- openfile -f O_CREAT:O_LOV_DELAY_CREATE -m 0644 $file > /dev/null
- sleep 1
- iused=$(lfs df -i $dir | grep MDT | awk '{print $3}')
- local -a newused=($iused)
-
- local num=0
- for ((i=0; i<${#newused[@]}; i++)); do
- if [ ${oldused[$i]} -lt ${newused[$i]} ]; then
- echo $(( i + 1 ))
- rm -f $file
- return 0
- fi
- done
- error "mdt-s : inodes count OLD ${oldused[@]} NEW ${newused[@]}"
+ SEQ=$(lfs path2fid $dir | tr '[:]' ' '|cut -f2 -d ' ')
+ if [ "$SEQ" == "" ]; then
+ error "can't get sequence for $dir"
+ return 1
+ fi
+ export SEQ
+
+ do_facet mds1 "cat /proc/fs/lustre/fld/srv-*-MDT0000/fldb" | \
+ tr '[)]:-' ' ' | \
+ while read SS EE IDX TYP; do \
+ if let "SEQ >= SS && SEQ < EE"; then \
+ echo $IDX; \
+ fi; \
+ done
}
mdsrate_cleanup () {
/*
* Create the ZFS filesystem with any required mkfs options:
* - canmount=off is set to prevent zfs automounting
- * - version=4 is set because SA are not yet handled by the osd
+ * - xattr=sa is set to use system attribute based xattrs
*/
memset(mkfs_cmd, 0, PATH_MAX);
snprintf(mkfs_cmd, PATH_MAX,
"zfs create -o canmount=off -o xattr=sa%s %s",
- zfs_mkfs_opts(mop, mkfs_tmp, PATH_MAX),
- ds);
+ zfs_mkfs_opts(mop, mkfs_tmp, PATH_MAX), ds);
vprint("mkfs_cmd = %s\n", mkfs_cmd);
ret = run_command(mkfs_cmd, PATH_MAX);