while (!list_empty(list)) {
obj = list_entry(list->next,
struct osd_object, oo_unlinked_linkage);
- LASSERT(obj->oo_db != NULL);
- oid = obj->oo_db->db_object;
+ LASSERT(obj->oo_dn != NULL);
+ oid = obj->oo_dn->dn_object;
list_del_init(&obj->oo_unlinked_linkage);
if (free)
{
char *mntdev = lustre_cfg_string(cfg, 1);
char *svname = lustre_cfg_string(cfg, 4);
- dmu_buf_t *rootdb;
+ dnode_t *rootdn;
const char *opts;
int rc;
ENTRY;
if (rc)
GOTO(err, rc);
- rc = __osd_obj2dbuf(env, o->od_os, o->od_rootid, &rootdb);
+ rc = __osd_obj2dnode(env, o->od_os, o->od_rootid, &rootdn);
if (rc)
GOTO(err, rc);
- o->od_root = rootdb->db_object;
- sa_buf_rele(rootdb, osd_obj_tag);
+ o->od_root = rootdn->dn_object;
+ osd_dnode_rele(rootdn);
/* 1. initialize oi before any file create or file open */
rc = osd_oi_init(env, o);
#include "osd_internal.h"
#include <sys/dnode.h>
-#include <sys/dbuf.h>
#include <sys/spa.h>
#include <sys/stat.h>
#include <sys/zap.h>
#include <sys/sa_impl.h>
#include <sys/txg.h>
-static inline int osd_object_is_zap(dmu_buf_t *db)
+static inline int osd_object_is_zap(dnode_t *dn)
{
- dmu_buf_impl_t *dbi = (dmu_buf_impl_t *) db;
- dnode_t *dn;
- int rc;
-
- DB_DNODE_ENTER(dbi);
- dn = DB_DNODE(dbi);
- rc = (dn->dn_type == DMU_OT_DIRECTORY_CONTENTS ||
+ return (dn->dn_type == DMU_OT_DIRECTORY_CONTENTS ||
dn->dn_type == DMU_OT_USERGROUP_USED);
- DB_DNODE_EXIT(dbi);
-
- return rc;
}
/* We don't actually have direct access to the zap_hashbits() function
{
struct osd_device *d = osd_obj2dev(o);
osd_zap_cursor_init_serialized(zc, d->od_os,
- o->oo_db->db_object, dirhash);
+ o->oo_dn->dn_object, dirhash);
}
static inline int osd_obj_cursor_init(zap_cursor_t **zc, struct osd_object *o,
uint64_t dirhash)
{
struct osd_device *d = osd_obj2dev(o);
- return osd_zap_cursor_init(zc, d->od_os, o->oo_db->db_object, dirhash);
+ return osd_zap_cursor_init(zc, d->od_os, o->oo_dn->dn_object, dirhash);
}
static struct dt_it *osd_index_it_init(const struct lu_env *env,
RETURN(ERR_PTR(-ENOENT));
LASSERT(lu_object_exists(lo));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
LASSERT(info);
OBD_SLAB_ALLOC_PTR_GFP(it, osd_zapit_cachep, GFP_NOFS);
ENTRY;
/* first of all, get parent dnode from own attributes */
- LASSERT(osd_dt_obj(o)->oo_db);
- rc = -sa_handle_get(osd->od_os, osd_dt_obj(o)->oo_db->db_object,
+ LASSERT(osd_dt_obj(o)->oo_dn);
+ rc = -sa_handle_get(osd->od_os, osd_dt_obj(o)->oo_dn->dn_object,
NULL, SA_HDL_PRIVATE, &sa_hdl);
if (rc != 0)
RETURN(rc);
}
memset(&oti->oti_zde.lzd_fid, 0, sizeof(struct lu_fid));
- rc = -zap_lookup(osd->od_os, obj->oo_db->db_object,
+ rc = -zap_lookup(osd->od_os, obj->oo_dn->dn_object,
(char *)key, 8, sizeof(oti->oti_zde) / 8,
(void *)&oti->oti_zde);
if (rc != 0)
oh = container_of0(th, struct osd_thandle, ot_super);
/* This is for inserting dot/dotdot for new created dir. */
- if (obj->oo_db == NULL)
+ if (obj->oo_dn == NULL)
object = DMU_NEW_OBJECT;
else
- object = obj->oo_db->db_object;
+ object = obj->oo_dn->dn_object;
/* do not specify the key as then DMU is trying to look it up
* which is very expensive. usually the layers above lookup
int rc;
ENTRY;
- LASSERT(parent->oo_db);
+ LASSERT(parent->oo_dn);
LASSERT(dt_object_exists(dt));
LASSERT(osd_invariant(parent));
oti->oti_zde.lzd_fid = *fid;
/* Insert (key,oid) into ZAP */
- rc = -zap_add(osd->od_os, parent->oo_db->db_object,
+ rc = -zap_add(osd->od_os, parent->oo_dn->dn_object,
(char *)key, 8, sizeof(oti->oti_zde) / 8,
(void *)&oti->oti_zde, oh->ot_tx);
if (unlikely(rc == -EEXIST &&
name[0] == '.' && name[1] == '.' && name[2] == 0))
/* Update (key,oid) in ZAP */
- rc = -zap_update(osd->od_os, parent->oo_db->db_object,
+ rc = -zap_update(osd->od_os, parent->oo_dn->dn_object,
(char *)key, 8, sizeof(oti->oti_zde) / 8,
(void *)&oti->oti_zde, oh->ot_tx);
oh = container_of0(th, struct osd_thandle, ot_super);
if (dt_object_exists(dt)) {
- LASSERT(obj->oo_db);
- dnode = obj->oo_db->db_object;
+ LASSERT(obj->oo_dn);
+ dnode = obj->oo_dn->dn_object;
} else {
dnode = DMU_NEW_OBJECT;
}
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
struct osd_thandle *oh;
- dmu_buf_t *zap_db = obj->oo_db;
+ dnode_t *zap_dn = obj->oo_dn;
char *name = (char *)key;
int rc;
ENTRY;
- LASSERT(zap_db);
+ LASSERT(zap_dn);
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
}
/* Remove key from the ZAP */
- rc = -zap_remove(osd->od_os, zap_db->db_object,
+ rc = -zap_remove(osd->od_os, zap_dn->dn_object,
(char *) key, oh->ot_tx);
if (unlikely(rc && rc != -ENOENT))
rc = osd_prepare_key_uint64(obj, k, key);
- rc = -zap_lookup_uint64(osd->od_os, obj->oo_db->db_object,
+ rc = -zap_lookup_uint64(osd->od_os, obj->oo_dn->dn_object,
k, rc, obj->oo_recusize, obj->oo_recsize,
(void *)rec);
RETURN(rc == 0 ? 1 : rc);
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
- dmu_tx_hold_bonus(oh->ot_tx, obj->oo_db->db_object);
+ dmu_tx_hold_bonus(oh->ot_tx, obj->oo_dn->dn_object);
/* do not specify the key as then DMU is trying to look it up
* which is very expensive. usually the layers above lookup
* before insertion */
- dmu_tx_hold_zap(oh->ot_tx, obj->oo_db->db_object, TRUE, NULL);
+ dmu_tx_hold_zap(oh->ot_tx, obj->oo_dn->dn_object, TRUE, NULL);
RETURN(0);
}
int rc;
ENTRY;
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
LASSERT(dt_object_exists(dt));
LASSERT(osd_invariant(obj));
LASSERT(th != NULL);
rc = osd_prepare_key_uint64(obj, k, key);
/* Insert (key,oid) into ZAP */
- rc = -zap_add_uint64(osd->od_os, obj->oo_db->db_object,
+ rc = -zap_add_uint64(osd->od_os, obj->oo_dn->dn_object,
k, rc, obj->oo_recusize, obj->oo_recsize,
(void *)rec, oh->ot_tx);
RETURN(rc);
LASSERT(dt_object_exists(dt));
LASSERT(osd_invariant(obj));
LASSERT(th != NULL);
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
oh = container_of0(th, struct osd_thandle, ot_super);
/* do not specify the key as then DMU is trying to look it up
* which is very expensive. usually the layers above lookup
* before deletion */
- dmu_tx_hold_zap(oh->ot_tx, obj->oo_db->db_object, FALSE, NULL);
+ dmu_tx_hold_zap(oh->ot_tx, obj->oo_dn->dn_object, FALSE, NULL);
RETURN(0);
}
int rc;
ENTRY;
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
rc = osd_prepare_key_uint64(obj, k, key);
/* Remove binary key from the ZAP */
- rc = -zap_remove_uint64(osd->od_os, obj->oo_db->db_object,
+ rc = -zap_remove_uint64(osd->od_os, obj->oo_dn->dn_object,
k, rc, oh->ot_tx);
RETURN(rc);
}
*((__u64 *)key));
zap_cursor_fini(it->ozi_zc);
- zap_cursor_init(it->ozi_zc, osd->od_os, obj->oo_db->db_object);
+ zap_cursor_init(it->ozi_zc, osd->od_os, obj->oo_dn->dn_object);
it->ozi_reset = 1;
RETURN(+1);
rc = osd_prepare_key_uint64(obj, k, (const struct dt_key *)za->za_name);
- rc = -zap_lookup_uint64(osd->od_os, obj->oo_db->db_object,
+ rc = -zap_lookup_uint64(osd->od_os, obj->oo_dn->dn_object,
k, rc, obj->oo_recusize, obj->oo_recsize,
(void *)rec);
RETURN(rc);
/* reset the cursor */
zap_cursor_fini(it->ozi_zc);
zap_cursor_init_serialized(it->ozi_zc, osd->od_os,
- obj->oo_db->db_object, hash);
+ obj->oo_dn->dn_object, hash);
it->ozi_reset = 0;
rc = -zap_cursor_retrieve(it->ozi_zc, za);
GOTO(out, rc = 0);
}
- LASSERT(!dt_object_exists(dt) || obj->oo_db != NULL);
+ LASSERT(!dt_object_exists(dt) || obj->oo_dn != NULL);
if (likely(feat == &dt_directory_features)) {
- if (!dt_object_exists(dt) || osd_object_is_zap(obj->oo_db))
+ if (!dt_object_exists(dt) || osd_object_is_zap(obj->oo_dn))
dt->do_index_ops = &osd_dir_ops;
else
GOTO(out, rc = -ENOTDIR);
struct osd_oi {
char oi_name[OSD_OI_NAME_SIZE]; /* unused */
uint64_t oi_zapid;
- dmu_buf_t *oi_db;
+ dnode_t *oi_dn;
};
struct osd_seq {
* Not modified concurrently (either setup early during object
* creation, or assigned by osd_object_create() under write lock).
*/
- dmu_buf_t *oo_db;
+ dnode_t *oo_dn;
sa_handle_t *oo_sa_hdl;
nvlist_t *oo_sa_xattr;
struct list_head oo_sa_linkage;
/* osd_object.c */
extern char *osd_obj_tag;
void osd_object_sa_dirty_rele(struct osd_thandle *oh);
-int __osd_obj2dbuf(const struct lu_env *env, objset_t *os,
- uint64_t oid, dmu_buf_t **dbp);
+int __osd_obj2dnode(const struct lu_env *env, objset_t *os,
+ uint64_t oid, dnode_t **dnp);
struct lu_object *osd_object_alloc(const struct lu_env *env,
const struct lu_object_header *hdr,
struct lu_device *d);
int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type,
void *buf, uint32_t buflen, struct osd_thandle *oh);
int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
- dmu_buf_t **zap_dbp, dmu_tx_t *tx, struct lu_attr *la,
+ dnode_t **zap_dnp, dmu_tx_t *tx, struct lu_attr *la,
zap_flags_t flags);
int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
- dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la);
+ dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la);
int __osd_attr_init(const struct lu_env *env, struct osd_device *osd,
sa_handle_t *sa_hdl, dmu_tx_t *tx,
struct lu_attr *la, uint64_t parent);
if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
return -ENOENT;
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
if (osd_obj2dev(obj)->od_xattr_in_sa) {
rc = __osd_sa_xattr_set(env, obj, buf, name, fl, oh);
if (rc == -EFBIG)
#define DN_MAX_BONUSLEN DN_OLD_MAX_BONUSLEN
#endif
+static inline void osd_dnode_rele(dnode_t *dn)
+{
+ dmu_buf_impl_t *db;
+ LASSERT(dn);
+ LASSERT(dn->dn_bonus);
+ db = dn->dn_bonus;
+
+ DB_DNODE_EXIT(db);
+ dmu_buf_rele(&db->db, osd_obj_tag);
+}
#endif /* _OSD_INTERNAL_H */
#include <sys/sa_impl.h>
#include <sys/txg.h>
-static char *osd_zerocopy_tag = "zerocopy";
+static char *osd_0copy_tag = "zerocopy";
static void record_start_io(struct osd_device *osd, int rw, int discont_pages)
unsigned long start;
LASSERT(dt_object_exists(dt));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
start = cfs_time_current();
record_start_io(osd, READ, 0);
- rc = -dmu_read(osd->od_os, obj->oo_db->db_object, *pos, size,
+ rc = -dmu_read(osd->od_os, obj->oo_dn->dn_object, *pos, size,
buf->lb_buf, DMU_READ_PREFETCH);
record_end_io(osd, READ, cfs_time_current() - start, size,
/* size change (in dnode) will be declared by dmu_tx_hold_write() */
if (dt_object_exists(dt))
- oid = obj->oo_db->db_object;
+ oid = obj->oo_dn->dn_object;
else
oid = DMU_NEW_OBJECT;
ENTRY;
LASSERT(dt_object_exists(dt));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
- dmu_write(osd->od_os, obj->oo_db->db_object, offset,
+ dmu_write(osd->od_os, obj->oo_dn->dn_object, offset,
(uint64_t)buf->lb_len, buf->lb_buf, oh->ot_tx);
write_lock(&obj->oo_attr_lock);
if (obj->oo_attr.la_size < offset + buf->lb_len) {
int i;
LASSERT(dt_object_exists(dt));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
for (i = 0; i < npages; i++) {
if (lnb[i].lnb_page == NULL)
ptr = (unsigned long)lnb[i].lnb_data;
if (ptr & 1UL) {
ptr &= ~1UL;
- dmu_buf_rele((void *)ptr, osd_zerocopy_tag);
+ dmu_buf_rele((void *)ptr, osd_0copy_tag);
atomic_dec(&osd->od_zerocopy_pin);
} else if (lnb[i].lnb_data != NULL) {
dmu_return_arcbuf(lnb[i].lnb_data);
* can get own replacement for dmu_buf_hold_array_by_bonus().
*/
while (len > 0) {
- rc = -dmu_buf_hold_array_by_bonus(obj->oo_db, off, len, TRUE,
- osd_zerocopy_tag, &numbufs,
- &dbp);
+ rc = -dmu_buf_hold_array_by_bonus(&obj->oo_dn->dn_bonus->db,
+ off, len, TRUE, osd_0copy_tag,
+ &numbufs, &dbp);
if (unlikely(rc))
GOTO(err, rc);
dbp[i] = NULL;
}
- dmu_buf_rele_array(dbp, numbufs, osd_zerocopy_tag);
+ dmu_buf_rele_array(dbp, numbufs, osd_0copy_tag);
}
record_end_io(osd, READ, cfs_time_current() - start,
struct osd_device *osd = osd_obj2dev(obj);
int plen, off_in_block, sz_in_block;
int rc, i = 0, npages = 0;
- arc_buf_t *abuf;
- uint32_t bs;
- uint64_t dummy;
+ dnode_t *dn = obj->oo_dn;
+ arc_buf_t *abuf;
+ uint32_t bs = dn->dn_datablksz;
ENTRY;
- dmu_object_size_from_db(obj->oo_db, &bs, &dummy);
-
/*
* currently only full blocks are subject to zerocopy approach:
* so that we're sure nobody is trying to update the same block
if (sz_in_block == bs) {
/* full block, try to use zerocopy */
- abuf = dmu_request_arcbuf(obj->oo_db, bs);
+ abuf = dmu_request_arcbuf(&dn->dn_bonus->db, bs);
if (unlikely(abuf == NULL))
GOTO(out_err, rc = -ENOMEM);
int rc;
LASSERT(dt_object_exists(dt));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
if (rw == 0)
rc = osd_bufs_get_read(env, obj, offset, len, lnb);
struct osd_object *obj = osd_dt_obj(dt);
LASSERT(dt_object_exists(dt));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
return 0;
}
-static inline uint32_t osd_get_blocksz(struct osd_object *obj)
-{
- uint32_t blksz;
- u_longlong_t unused;
-
- LASSERT(obj->oo_db);
-
- dmu_object_size_from_db(obj->oo_db, &blksz, &unused);
- return blksz;
-}
-
static inline uint64_t osd_roundup2blocksz(uint64_t size,
uint64_t offset,
uint32_t blksz)
struct osd_thandle *oh;
uint64_t offset = 0;
uint32_t size = 0;
- uint32_t blksz = osd_get_blocksz(obj);
+ uint32_t blksz = obj->oo_dn->dn_datablksz;
int i, rc, flags = 0;
bool ignore_quota = false, synced = false;
long long space = 0;
ENTRY;
LASSERT(dt_object_exists(dt));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
LASSERT(lnb);
LASSERT(npages > 0);
continue;
}
- dmu_tx_hold_write(oh->ot_tx, obj->oo_db->db_object,
+ dmu_tx_hold_write(oh->ot_tx, obj->oo_dn->dn_object,
offset, size);
/* Estimating space to be consumed by a write is rather
* complicated with ZFS. As a consequence, we don't account for
}
if (size) {
- dmu_tx_hold_write(oh->ot_tx, obj->oo_db->db_object,
+ dmu_tx_hold_write(oh->ot_tx, obj->oo_dn->dn_object,
offset, size);
space += osd_roundup2blocksz(size, offset, blksz);
}
uint64_t start, uint64_t end)
{
struct osd_device *osd = osd_obj2dev(obj);
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)obj->oo_db;
- dnode_t *dn;
+ dnode_t *dn = obj->oo_dn;
uint32_t blksz;
int rc = 0;
ENTRY;
- DB_DNODE_ENTER(db);
- dn = DB_DNODE(db);
-
if (dn->dn_maxblkid > 0) /* can't change block size */
GOTO(out, rc);
out_unlock:
up_write(&obj->oo_guard);
out:
- DB_DNODE_EXIT(db);
return rc;
}
ENTRY;
LASSERT(dt_object_exists(dt));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
}
if (lnb[i].lnb_page->mapping == (void *)obj) {
- dmu_write(osd->od_os, obj->oo_db->db_object,
+ dmu_write(osd->od_os, obj->oo_dn->dn_object,
lnb[i].lnb_file_offset, lnb[i].lnb_len,
kmap(lnb[i].lnb_page), oh->ot_tx);
kunmap(lnb[i].lnb_page);
* notice that dmu_assign_arcbuf() is smart
* enough to recognize changed blocksize
* in this case it fallbacks to dmu_write() */
- dmu_assign_arcbuf(obj->oo_db, lnb[i].lnb_file_offset,
+ dmu_assign_arcbuf(&obj->oo_dn->dn_bonus->db,
+ lnb[i].lnb_file_offset,
lnb[i].lnb_data, oh->ot_tx);
/* drop the reference, otherwise osd_put_bufs()
* will be releasing it - bad! */
loff_t eof;
LASSERT(dt_object_exists(dt));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
read_lock(&obj->oo_attr_lock);
eof = obj->oo_attr.la_size;
* dmu_tx_hold_sa() and if off < size, dmu_tx_hold_free()
* called and then assigned to a transaction group.
*/
-static int __osd_object_punch(objset_t *os, dmu_buf_t *db, dmu_tx_t *tx,
+static int __osd_object_punch(objset_t *os, dnode_t *dn, dmu_tx_t *tx,
uint64_t size, uint64_t off, uint64_t len)
{
int rc = 0;
if (len == DMU_OBJECT_END && size == off)
return 0;
+ /* XXX: dnode_free_range() can be used to save on dnode lookup */
if (off < size)
- rc = -dmu_free_range(os, db->db_object, off, len, tx);
+ dmu_free_range(os, dn->dn_object, off, len, tx);
return rc;
}
len = end - start;
write_unlock(&obj->oo_attr_lock);
- rc = __osd_object_punch(osd->od_os, obj->oo_db, oh->ot_tx,
+ rc = __osd_object_punch(osd->od_os, obj->oo_dn, oh->ot_tx,
obj->oo_attr.la_size, start, len);
/* set new size */
if (len == DMU_OBJECT_END) {
/* declare we'll free some blocks ... */
if (start < obj->oo_attr.la_size) {
read_unlock(&obj->oo_attr_lock);
- dmu_tx_hold_free(oh->ot_tx, obj->oo_db->db_object, start, len);
+ dmu_tx_hold_free(oh->ot_tx, obj->oo_dn->dn_object, start, len);
} else {
read_unlock(&obj->oo_attr_lock);
}
int rc;
LASSERT(obj->oo_sa_hdl == NULL);
- LASSERT(obj->oo_db != NULL);
+ LASSERT(obj->oo_dn != NULL);
- rc = -sa_handle_get(o->od_os, obj->oo_db->db_object, obj,
+ rc = -sa_handle_get(o->od_os, obj->oo_dn->dn_object, obj,
SA_HDL_PRIVATE, &obj->oo_sa_hdl);
if (rc)
return rc;
int rc;
ENTRY;
- LASSERT(obj->oo_db != NULL);
+ LASSERT(obj->oo_dn != NULL);
- rc = -sa_handle_get(o->od_os, obj->oo_db->db_object, NULL,
+ rc = -sa_handle_get(o->od_os, obj->oo_dn->dn_object, NULL,
SA_HDL_PRIVATE, &sa_hdl);
if (rc)
RETURN(rc);
RETURN(rc);
}
-int __osd_obj2dbuf(const struct lu_env *env, objset_t *os,
- uint64_t oid, dmu_buf_t **dbp)
+int __osd_obj2dnode(const struct lu_env *env, objset_t *os,
+ uint64_t oid, dnode_t **dnp)
{
dmu_object_info_t *doi = &osd_oti_get(env)->oti_doi;
+ dmu_buf_t *db;
+ dmu_buf_impl_t *dbi;
int rc;
- rc = -sa_buf_hold(os, oid, osd_obj_tag, dbp);
+ rc = dmu_bonus_hold(os, oid, osd_obj_tag, &db);
if (rc)
return rc;
- dmu_object_info_from_db(*dbp, doi);
+ dbi = (dmu_buf_impl_t *)db;
+ DB_DNODE_ENTER(dbi);
+ *dnp = DB_DNODE(dbi);
+
+ LASSERT(*dnp != NULL);
+ dmu_object_info_from_dnode(*dnp, doi);
if (unlikely (oid != DMU_USERUSED_OBJECT &&
oid != DMU_GROUPUSED_OBJECT && doi->doi_bonus_type != DMU_OT_SA)) {
- sa_buf_rele(*dbp, osd_obj_tag);
- *dbp = NULL;
+ osd_dnode_rele(*dnp);
+ *dnp = NULL;
return -EINVAL;
}
- LASSERT(*dbp);
- LASSERT((*dbp)->db_object == oid);
- LASSERT((*dbp)->db_offset == -1);
- LASSERT((*dbp)->db_data != NULL);
-
return 0;
}
int rc = 0;
ENTRY;
- if (obj->oo_db == NULL)
+ if (obj->oo_dn == NULL)
RETURN(0);
/* object exist */
rc = osd_fid_lookup(env, osd, lu_object_fid(l), &oid);
if (rc == 0) {
- LASSERT(obj->oo_db == NULL);
- rc = __osd_obj2dbuf(env, osd->od_os, oid, &obj->oo_db);
+ LASSERT(obj->oo_dn == NULL);
+ rc = __osd_obj2dnode(env, osd->od_os, oid, &obj->oo_dn);
/* EEXIST will be returned if object is being deleted in ZFS */
if (rc == -EEXIST) {
rc = 0;
osd->od_svname, PFID(lu_object_fid(l)), oid, rc);
GOTO(out, rc);
}
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
rc = osd_object_init0(env, obj);
if (rc != 0)
GOTO(out, rc);
osd_object_set_destroy_type(obj);
if (obj->oo_destroy == OSD_DESTROY_SYNC)
- dmu_tx_hold_free(oh->ot_tx, obj->oo_db->db_object,
+ dmu_tx_hold_free(oh->ot_tx, obj->oo_dn->dn_object,
0, DMU_OBJECT_END);
else
dmu_tx_hold_zap(oh->ot_tx, osd->od_unlinkedid, TRUE, NULL);
if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
GOTO(out, rc = -ENOENT);
- LASSERT(obj->oo_db != NULL);
+ LASSERT(obj->oo_dn != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
LASSERT(oh != NULL);
" %d: rc = %d\n", osd->od_svname, PFID(fid),
obj->oo_attr.la_gid, rc);
- oid = obj->oo_db->db_object;
+ oid = obj->oo_dn->dn_object;
if (unlikely(obj->oo_destroy == OSD_DESTROY_NONE)) {
/* this may happen if the destroy wasn't declared
* e.g. when the object is created and then destroyed
{
struct osd_object *obj = osd_obj(l);
- if (obj->oo_db != NULL) {
+ if (obj->oo_dn != NULL) {
osd_object_sa_fini(obj);
if (obj->oo_sa_xattr) {
nvlist_free(obj->oo_sa_xattr);
obj->oo_sa_xattr = NULL;
}
- sa_buf_rele(obj->oo_db, osd_obj_tag);
+ osd_dnode_rele(obj->oo_dn);
list_del(&obj->oo_sa_linkage);
- obj->oo_db = NULL;
+ obj->oo_dn = NULL;
}
}
GOTO(out, rc = -ENOENT);
LASSERT(osd_invariant(obj));
- LASSERT(obj->oo_db);
+ LASSERT(obj->oo_dn);
read_lock(&obj->oo_attr_lock);
*attr = obj->oo_attr;
txh = list_next(&oh->ot_tx->tx_holds, txh)) {
if (txh->txh_dnode == NULL)
continue;
- if (txh->txh_dnode->dn_object != obj->oo_db->db_object)
+ if (txh->txh_dnode->dn_object != obj->oo_dn->dn_object)
continue;
/* this object is part of the transaction already
* we don't need to declare bonus again */
break;
}
if (!found)
- dmu_tx_hold_bonus(oh->ot_tx, obj->oo_db->db_object);
+ dmu_tx_hold_bonus(oh->ot_tx, obj->oo_dn->dn_object);
if (oh->ot_tx->tx_err != 0)
GOTO(out, rc = -oh->ot_tx->tx_err);
return rc;
}
+static int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx,
+ uint64_t oid, dnode_t **dnp)
+{
+ dmu_tx_hold_t *txh;
+ int rc = 0;
+
+ /* take dnode_t from tx to save on dnode#->dnode_t lookup */
+ for (txh = list_tail(&tx->tx_holds); txh;
+ txh = list_prev(&tx->tx_holds, txh)) {
+ dnode_t *dn = txh->txh_dnode;
+ dmu_buf_impl_t *db;
+
+ if (dn == NULL)
+ continue;
+ if (dn->dn_object != oid)
+ continue;
+ db = dn->dn_bonus;
+ if (db == NULL) {
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ if (dn->dn_bonus == NULL)
+ dbuf_create_bonus(dn);
+ rw_exit(&dn->dn_struct_rwlock);
+ }
+ db = dn->dn_bonus;
+ LASSERT(db);
+ LASSERT(dn->dn_handle);
+ DB_DNODE_ENTER(db);
+ if (refcount_add(&db->db_holds, osd_obj_tag) == 1) {
+ refcount_add(&dn->dn_holds, tag);
+ atomic_inc_32(&dn->dn_dbufs_count);
+ }
+ *dnp = dn;
+ break;
+ }
+
+ if (unlikely(*dnp == NULL))
+ rc = __osd_obj2dnode(env, tx->tx_objset, oid, dnp);
+
+ return rc;
+}
+
/*
* The transaction passed to this routine must have
* dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
* to a transaction group.
*/
int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
- dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la)
+ dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la)
{
- uint64_t oid;
- int rc;
struct osd_device *osd = osd_obj2dev(obj);
const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
dmu_object_type_t type = DMU_OT_PLAIN_FILE_CONTENTS;
+ uint64_t oid;
/* Use DMU_OTN_UINT8_METADATA for local objects so their data blocks
* would get an additional ditto copy */
/* Create a new DMU object using the default dnode size. */
oid = osd_dmu_object_alloc(osd->od_os, type, 0, 0, tx);
- rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, dbp);
- LASSERTF(rc == 0, "sa_buf_hold %llu failed: %d\n", oid, rc);
LASSERT(la->la_valid & LA_MODE);
la->la_size = 0;
la->la_nlink = 1;
- return 0;
+ return osd_find_new_dnode(env, tx, oid, dnp);
}
/*
* then we might need to re-evaluate the use of this flag and instead do
* a conversion from the different internal ZAP hash formats being used. */
int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
- dmu_buf_t **zap_dbp, dmu_tx_t *tx,
- struct lu_attr *la, zap_flags_t flags)
+ dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la,
+ zap_flags_t flags)
{
uint64_t oid;
- int rc;
/* Assert that the transaction has been assigned to a
transaction group. */
LASSERT(tx->tx_txg != 0);
+ *dnp = NULL;
oid = osd_zap_create_flags(osd->od_os, 0, flags | ZAP_FLAG_HASH64,
DMU_OT_DIRECTORY_CONTENTS,
DN_MAX_INDBLKSHIFT, /* indirect blockshift */
0, tx);
- rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, zap_dbp);
- if (rc)
- return rc;
-
la->la_size = 2;
la->la_nlink = 1;
- return 0;
+ return osd_find_new_dnode(env, tx, oid, dnp);
}
-static dmu_buf_t *osd_mkidx(const struct lu_env *env, struct osd_object *obj,
- struct lu_attr *la, struct osd_thandle *oh)
+static dnode_t *osd_mkidx(const struct lu_env *env, struct osd_object *obj,
+ struct lu_attr *la, struct osd_thandle *oh)
{
- dmu_buf_t *db;
- int rc;
+ dnode_t *dn;
+ int rc;
/* Index file should be created as regular file in order not to confuse
* ZPL which could interpret them as directory.
* We set ZAP_FLAG_UINT64_KEY to let ZFS know than we are going to use
* binary keys */
LASSERT(S_ISREG(la->la_mode));
- rc = __osd_zap_create(env, osd_obj2dev(obj), &db, oh->ot_tx, la,
+ rc = __osd_zap_create(env, osd_obj2dev(obj), &dn, oh->ot_tx, la,
ZAP_FLAG_UINT64_KEY);
if (rc)
return ERR_PTR(rc);
- return db;
+ return dn;
}
-static dmu_buf_t *osd_mkdir(const struct lu_env *env, struct osd_object *obj,
- struct lu_attr *la, struct osd_thandle *oh)
+static dnode_t *osd_mkdir(const struct lu_env *env, struct osd_object *obj,
+ struct lu_attr *la, struct osd_thandle *oh)
{
- dmu_buf_t *db;
- int rc;
+ dnode_t *dn;
+ int rc;
LASSERT(S_ISDIR(la->la_mode));
- rc = __osd_zap_create(env, osd_obj2dev(obj), &db, oh->ot_tx, la, 0);
+ rc = __osd_zap_create(env, osd_obj2dev(obj), &dn, oh->ot_tx, la, 0);
if (rc)
return ERR_PTR(rc);
- return db;
+ return dn;
}
-static dmu_buf_t *osd_mkreg(const struct lu_env *env, struct osd_object *obj,
- struct lu_attr *la, struct osd_thandle *oh)
+static dnode_t *osd_mkreg(const struct lu_env *env, struct osd_object *obj,
+ struct lu_attr *la, struct osd_thandle *oh)
{
const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
- dmu_buf_t *db;
- int rc;
struct osd_device *osd = osd_obj2dev(obj);
+ dnode_t *dn;
+ int rc;
LASSERT(S_ISREG(la->la_mode));
- rc = __osd_object_create(env, obj, &db, oh->ot_tx, la);
+ rc = __osd_object_create(env, obj, &dn, oh->ot_tx, la);
if (rc)
return ERR_PTR(rc);
* it will break the assumption in tgt_thread_big_cache where
* the array size is PTLRPC_MAX_BRW_PAGES. It will also affect
* RDMA due to subpage transfer size */
- rc = -dmu_object_set_blocksize(osd->od_os, db->db_object,
+ rc = -dmu_object_set_blocksize(osd->od_os, dn->dn_object,
PAGE_SIZE, 0, oh->ot_tx);
if (unlikely(rc)) {
CERROR("%s: can't change blocksize: %d\n",
}
}
- return db;
+ return dn;
}
-static dmu_buf_t *osd_mksym(const struct lu_env *env, struct osd_object *obj,
- struct lu_attr *la, struct osd_thandle *oh)
+static dnode_t *osd_mksym(const struct lu_env *env, struct osd_object *obj,
+ struct lu_attr *la, struct osd_thandle *oh)
{
- dmu_buf_t *db;
- int rc;
+ dnode_t *dn;
+ int rc;
LASSERT(S_ISLNK(la->la_mode));
- rc = __osd_object_create(env, obj, &db, oh->ot_tx, la);
+ rc = __osd_object_create(env, obj, &dn, oh->ot_tx, la);
if (rc)
return ERR_PTR(rc);
- return db;
+ return dn;
}
-static dmu_buf_t *osd_mknod(const struct lu_env *env, struct osd_object *obj,
- struct lu_attr *la, struct osd_thandle *oh)
+static dnode_t *osd_mknod(const struct lu_env *env, struct osd_object *obj,
+ struct lu_attr *la, struct osd_thandle *oh)
{
- dmu_buf_t *db;
- int rc;
+ dnode_t *dn;
+ int rc;
if (S_ISCHR(la->la_mode) || S_ISBLK(la->la_mode))
la->la_valid |= LA_RDEV;
- rc = __osd_object_create(env, obj, &db, oh->ot_tx, la);
+ rc = __osd_object_create(env, obj, &dn, oh->ot_tx, la);
if (rc)
return ERR_PTR(rc);
- return db;
+ return dn;
}
-typedef dmu_buf_t *(*osd_obj_type_f)(const struct lu_env *env,
- struct osd_object *obj,
- struct lu_attr *la,
- struct osd_thandle *oh);
+typedef dnode_t *(*osd_obj_type_f)(const struct lu_env *env,
+ struct osd_object *obj,
+ struct lu_attr *la,
+ struct osd_thandle *oh);
static osd_obj_type_f osd_create_type_f(enum dt_format_type type)
{
struct osd_device *osd = osd_obj2dev(obj);
char *buf = info->oti_str;
struct osd_thandle *oh;
- dmu_buf_t *db = NULL;
+ dnode_t *dn = NULL;
uint64_t zapid, parent = 0;
int rc;
* XXX missing: Quote handling.
*/
- LASSERT(obj->oo_db == NULL);
+ LASSERT(obj->oo_dn == NULL);
/* to follow ZFS on-disk format we need
* to initialize parent dnode properly */
if (hint != NULL && hint->dah_parent != NULL &&
!dt_object_remote(hint->dah_parent))
- parent = osd_dt_obj(hint->dah_parent)->oo_db->db_object;
+ parent = osd_dt_obj(hint->dah_parent)->oo_dn->dn_object;
/* we may fix some attributes, better do not change the source */
obj->oo_attr = *attr;
obj->oo_attr.la_valid |= LA_SIZE | LA_NLINK | LA_TYPE;
- db = osd_create_type_f(dof->dof_type)(env, obj, &obj->oo_attr, oh);
- if (IS_ERR(db)) {
- rc = PTR_ERR(db);
- db = NULL;
+ dn = osd_create_type_f(dof->dof_type)(env, obj, &obj->oo_attr, oh);
+ if (IS_ERR(dn)) {
+ rc = PTR_ERR(dn);
+ dn = NULL;
GOTO(out, rc);
}
zde->zde_pad = 0;
- zde->zde_dnode = db->db_object;
+ zde->zde_dnode = dn->dn_object;
zde->zde_type = IFTODT(attr->la_mode & S_IFMT);
zapid = osd_get_name_n_idx(env, osd, fid, buf, sizeof(info->oti_str));
GOTO(out, rc);
/* Now add in all of the "SA" attributes */
- rc = -sa_handle_get(osd->od_os, db->db_object, NULL,
+ rc = -sa_handle_get(osd->od_os, dn->dn_object, NULL,
SA_HDL_PRIVATE, &obj->oo_sa_hdl);
if (rc)
GOTO(out, rc);
/* configure new osd object */
- obj->oo_db = db;
+ obj->oo_dn = dn;
parent = parent != 0 ? parent : zapid;
rc = __osd_attr_init(env, osd, obj->oo_sa_hdl, oh->ot_tx,
&obj->oo_attr, parent);
"(%d)\n", osd->od_svname, PFID(fid), attr->la_gid, rc);
out:
- if (unlikely(rc && db)) {
- dmu_object_free(osd->od_os, db->db_object, oh->ot_tx);
- sa_buf_rele(db, osd_obj_tag);
- obj->oo_db = NULL;
+ if (unlikely(rc && dn)) {
+ dmu_object_free(osd->od_os, dn->dn_object, oh->ot_tx);
+ osd_dnode_rele(dn);
+ obj->oo_dn = NULL;
}
up_write(&obj->oo_guard);
RETURN(rc);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 1, NULL);
dmu_tx_hold_bonus(tx, parent);
dmu_tx_hold_zap(tx, parent, TRUE, name);
- LASSERT(tx->tx_objset->os_sa);
dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
rc = -dmu_tx_assign(tx, TXG_WAIT);
oi = o->od_oi_table[key];
if (oi) {
- if (oi->oi_db)
- sa_buf_rele(oi->oi_db, osd_obj_tag);
+ if (oi->oi_dn)
+ osd_dnode_rele(oi->oi_dn);
OBD_FREE_PTR(oi);
o->od_oi_table[key] = NULL;
}
}
o->od_oi_table[key] = oi;
- __osd_obj2dbuf(env, o->od_os, oi->oi_zapid, &oi->oi_db);
+ __osd_obj2dnode(env, o->od_os, oi->oi_zapid, &oi->oi_dn);
return 0;
}
idc = osd_idc_find(env, osd, fid);
if (idc != NULL) {
- if (obj->oo_db == NULL)
+ if (obj->oo_dn == NULL)
return 0;
- idc->oic_dnode = obj->oo_db->db_object;
+ idc->oic_dnode = obj->oo_dn->dn_object;
return 0;
}
if (IS_ERR(idc))
return PTR_ERR(idc);
- if (obj->oo_db)
- idc->oic_dnode = obj->oo_db->db_object;
+ if (obj->oo_dn)
+ idc->oic_dnode = obj->oo_dn->dn_object;
return 0;
}
* DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which
* not associated with any dmu_but_t (see dnode_special_open()).
* As a consequence, we cannot use udmu_zap_lookup() here since it
- * requires a valid oo_db. */
+ * requires a valid oo_dn. */
rc = -zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1,
&rec->bspace);
if (rc == -ENOENT)
/* as for inode accounting, it is not maintained by DMU, so we just
* use our own ZAP to track inode usage */
- rc = -zap_lookup(osd->od_os, obj->oo_db->db_object,
+ rc = -zap_lookup(osd->od_os, obj->oo_dn->dn_object,
buf, sizeof(uint64_t), 1, &rec->ispace);
if (rc == -ENOENT)
/* user/group has not created any file yet */
/* inode accounting is not maintained by DMU, so we use our own ZAP to
* track inode usage */
- rc = -zap_lookup(osd->od_os, it->oiq_obj->oo_db->db_object,
+ rc = -zap_lookup(osd->od_os, it->oiq_obj->oo_dn->dn_object,
za->za_name, sizeof(uint64_t), 1, &rec->ispace);
if (rc == -ENOENT)
/* user/group has not created any file yet */
struct osd_object *obj)
{
LASSERT(obj->oo_sa_xattr == NULL);
- LASSERT(obj->oo_db != NULL);
+ LASSERT(obj->oo_dn != NULL);
- return __osd_xattr_load(osd_obj2dev(obj), obj->oo_db->db_object,
+ return __osd_xattr_load(osd_obj2dev(obj), obj->oo_dn->dn_object,
&obj->oo_sa_xattr);
}
uint64_t xattr, struct lu_buf *buf,
const char *name, int *sizep)
{
- dmu_buf_t *xa_data_db;
- sa_handle_t *sa_hdl = NULL;
+ dnode_t *xa_data_dn;
+ sa_handle_t *sa_hdl = NULL;
uint64_t xa_data_obj, size;
int rc;
if (rc)
return rc;
- rc = __osd_obj2dbuf(env, osd->od_os, xa_data_obj, &xa_data_db);
+ rc = __osd_obj2dnode(env, osd->od_os, xa_data_obj, &xa_data_dn);
if (rc)
return rc;
goto out;
}
- rc = -dmu_read(osd->od_os, xa_data_db->db_object, 0,
+ rc = -dmu_read(osd->od_os, xa_data_dn->dn_object, 0,
size, buf->lb_buf, DMU_READ_PREFETCH);
out:
sa_handle_destroy(sa_hdl);
out_rele:
- dmu_buf_rele(xa_data_db, FTAG);
+ osd_dnode_rele(xa_data_dn);
return rc;
}
int rc, size = 0;
ENTRY;
- LASSERT(obj->oo_db != NULL);
+ LASSERT(obj->oo_dn != NULL);
LASSERT(osd_invariant(obj));
if (!osd_obj2dev(obj)->od_posix_acl &&
int vallen, const char *name,
struct osd_thandle *oh)
{
- dmu_buf_t *db = obj->oo_db;
- dmu_tx_t *tx = oh->ot_tx;
+ dmu_tx_t *tx = oh->ot_tx;
if (unlikely(obj->oo_destroyed))
return;
/* XXX: it should be possible to skip spill
* declaration if specific EA is part of
* bonus and doesn't grow */
- dmu_tx_hold_spill(tx, db->db_object);
+ dmu_tx_hold_spill(tx, obj->oo_dn->dn_object);
return;
}
struct osd_thandle *oh)
{
struct osd_device *osd = osd_obj2dev(obj);
- dmu_buf_t *xa_zap_db = NULL;
- dmu_buf_t *xa_data_db = NULL;
+ dnode_t *xa_zap_dn = NULL;
+ dnode_t *xa_data_dn = NULL;
uint64_t xa_data_obj;
sa_handle_t *sa_hdl = NULL;
dmu_tx_t *tx = oh->ot_tx;
la->la_valid = LA_MODE;
la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
- rc = __osd_zap_create(env, osd, &xa_zap_db, tx, la, 0);
+ rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0);
if (rc)
return rc;
- obj->oo_xattr = xa_zap_db->db_object;
+ obj->oo_xattr = xa_zap_dn->dn_object;
rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
&obj->oo_xattr, 8, oh);
if (rc)
* Entry already exists.
* We'll truncate the existing object.
*/
- rc = __osd_obj2dbuf(env, osd->od_os, xa_data_obj,
- &xa_data_db);
+ rc = __osd_obj2dnode(env, osd->od_os, xa_data_obj, &xa_data_dn);
if (rc)
goto out;
if (rc)
goto out_sa;
- rc = -dmu_free_range(osd->od_os, xa_data_db->db_object,
+ rc = -dmu_free_range(osd->od_os, xa_data_dn->dn_object,
0, DMU_OBJECT_END, tx);
if (rc)
goto out_sa;
la->la_valid = LA_MODE;
la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
- rc = __osd_object_create(env, obj, &xa_data_db, tx, la);
+ rc = __osd_object_create(env, obj, &xa_data_dn, tx, la);
if (rc)
goto out;
- xa_data_obj = xa_data_db->db_object;
+ xa_data_obj = xa_data_dn->dn_object;
rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
SA_HDL_PRIVATE, &sa_hdl);
out_sa:
sa_handle_destroy(sa_hdl);
out:
- if (xa_data_db != NULL)
- dmu_buf_rele(xa_data_db, FTAG);
- if (xa_zap_db != NULL)
- dmu_buf_rele(xa_zap_db, FTAG);
+ if (xa_data_dn != NULL)
+ osd_dnode_rele(xa_data_dn);
+ if (xa_zap_dn != NULL)
+ osd_dnode_rele(xa_zap_dn);
return rc;
}
oh = container_of0(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_tx != NULL);
- LASSERT(obj->oo_db != NULL);
+ LASSERT(obj->oo_dn != NULL);
down_read(&obj->oo_guard);
if (likely(dt_object_exists(&obj->oo_dt) && !obj->oo_destroyed))
ENTRY;
LASSERT(handle != NULL);
- LASSERT(obj->oo_db != NULL);
+ LASSERT(obj->oo_dn != NULL);
LASSERT(osd_invariant(obj));
LASSERT(dt_object_exists(dt));
oh = container_of0(handle, struct osd_thandle, ot_super);
int rc, counted;
ENTRY;
- LASSERT(obj->oo_db != NULL);
+ LASSERT(obj->oo_dn != NULL);
LASSERT(osd_invariant(obj));
LASSERT(dt_object_exists(dt));