From: Alex Zhuravlev Date: Wed, 30 Nov 2016 20:07:54 +0000 (+0300) Subject: LU-8882 osd: use bydnode methods to access DMU X-Git-Tag: 2.9.57~8 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=aa096c486589129a7bc20f9b8c31803e56c4b82f LU-8882 osd: use bydnode methods to access DMU newer ZFS allows to access DMU by dnode which save expensive dnode# to dnode_t mapping. Change-Id: I469c2a72d18f170ebb96dd33c23bb6d8f037188a Signed-off-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/24035 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Nathaniel Clark Reviewed-by: Jinshan Xiong Reviewed-by: Oleg Drokin --- diff --git a/config/lustre-build-zfs.m4 b/config/lustre-build-zfs.m4 index ded7085..49170d2 100644 --- a/config/lustre-build-zfs.m4 +++ b/config/lustre-build-zfs.m4 @@ -515,6 +515,103 @@ your distribution. AC_DEFINE(HAVE_DMU_USEROBJ_ACCOUNTING, 1, [Have native dnode accounting in ZFS]) ]) + dnl # ZFS 0.7.x adds new method zap_lookup_by_dnode + dnl # + LB_CHECK_COMPILE([if ZFS has 'zap_lookup_by_dnode'], + zap_lookup_by_dnode, [ + #include + #include + ],[ + dnode_t *dn = NULL; + zap_lookup_by_dnode(dn, NULL, 1, 1, NULL); + ],[ + AC_DEFINE(HAVE_ZAP_LOOKUP_BY_DNODE, 1, + [Have zap_lookup_by_dnode() in ZFS]) + ]) + dnl # + dnl # ZFS 0.7.x adds new method zap_add_by_dnode + dnl # + LB_CHECK_COMPILE([if ZFS has 'zap_add_by_dnode'], + zap_add_by_dnode, [ + #include + #include + ],[ + dnode_t *dn = NULL; + zap_add_by_dnode(dn, NULL, 1, 1, NULL, NULL); + ],[ + AC_DEFINE(HAVE_ZAP_ADD_BY_DNODE, 1, + [Have zap_add_by_dnode() in ZFS]) + ]) + dnl # + dnl # ZFS 0.7.x adds new method zap_remove_by_dnode + dnl # + LB_CHECK_COMPILE([if ZFS has 'zap_remove_by_dnode'], + zap_remove_by_dnode, [ + #include + #include + ],[ + dnode_t *dn = NULL; + zap_remove_by_dnode(dn, NULL, NULL); + ],[ + AC_DEFINE(HAVE_ZAP_REMOVE_ADD_BY_DNODE, 1, + [Have zap_remove_by_dnode() in ZFS]) + ]) + dnl # + dnl # ZFS 0.7.x adds new method dmu_tx_hold_zap_by_dnode + dnl # + LB_CHECK_COMPILE([if ZFS has 'dmu_tx_hold_zap_by_dnode'], + dmu_tx_hold_zap_by_dnode, [ + #include + #include + ],[ + dnode_t *dn = NULL; + dmu_tx_hold_zap_by_dnode(NULL, dn, TRUE, NULL); + ],[ + AC_DEFINE(HAVE_DMU_TX_HOLD_ZAP_BY_DNODE, 1, + [Have dmu_tx_hold_zap_by_dnode() in ZFS]) + ]) + dnl # + dnl # ZFS 0.7.x adds new method dmu_tx_hold_write_by_dnode + dnl # + LB_CHECK_COMPILE([if ZFS has 'dmu_tx_hold_write_by_dnode'], + dmu_tx_hold_write_by_dnode, [ + #include + #include + ],[ + dnode_t *dn = NULL; + dmu_tx_hold_write_by_dnode(NULL, dn, 0, 0); + ],[ + AC_DEFINE(HAVE_DMU_TX_HOLD_WRITE_BY_DNODE, 1, + [Have dmu_tx_hold_write_by_dnode() in ZFS]) + ]) + dnl # + dnl # ZFS 0.7.x adds new method dmu_write_by_dnode + dnl # + LB_CHECK_COMPILE([if ZFS has 'dmu_write_by_dnode'], + dmu_write_by_dnode, [ + #include + #include + ],[ + dnode_t *dn = NULL; + dmu_write_by_dnode(dn, 0, 0, NULL, NULL); + ],[ + AC_DEFINE(HAVE_DMU_WRITE_BY_DNODE, 1, + [Have dmu_write_by_dnode() in ZFS]) + ]) + dnl # + dnl # ZFS 0.7.x adds new method dmu_read_by_dnode + dnl # + LB_CHECK_COMPILE([if ZFS has 'dmu_read_by_dnode'], + dmu_read_by_dnode, [ + #include + #include + ],[ + dnode_t *dn = NULL; + dmu_read_by_dnode(dn, 0, 0, NULL, 0); + ],[ + AC_DEFINE(HAVE_DMU_READ_BY_DNODE, 1, + [Have dmu_read_by_dnode() in ZFS]) + ]) ]) AM_CONDITIONAL(ZFS_ENABLED, [test "x$enable_zfs" = xyes]) diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index e8a89a0..4954e2a 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -232,9 +232,8 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, RETURN(rc); } -static int osd_unlinked_object_free(struct osd_device *osd, uint64_t oid); - -static void osd_unlinked_list_emptify(struct osd_device *osd, +static void osd_unlinked_list_emptify(const struct lu_env *env, + struct osd_device *osd, struct list_head *list, bool free) { struct osd_object *obj; @@ -248,7 +247,7 @@ static void osd_unlinked_list_emptify(struct osd_device *osd, list_del_init(&obj->oo_unlinked_linkage); if (free) - (void)osd_unlinked_object_free(osd, oid); + (void)osd_unlinked_object_free(env, osd, oid); } } @@ -292,7 +291,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, LASSERT(oh->ot_tx); dmu_tx_abort(oh->ot_tx); osd_object_sa_dirty_rele(oh); - osd_unlinked_list_emptify(osd, &unlinked, false); + osd_unlinked_list_emptify(env, osd, &unlinked, false); /* there won't be any commit, release reserved quota space now, * if any */ qsd_op_end(env, osd->od_quota_slave, &oh->ot_quota_trans); @@ -315,7 +314,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, * by osd_trans_commit_cb already. */ dmu_tx_commit(oh->ot_tx); - osd_unlinked_list_emptify(osd, &unlinked, true); + osd_unlinked_list_emptify(env, osd, &unlinked, true); if (sync) txg_wait_synced(dmu_objset_pool(osd->od_os), txg); @@ -846,7 +845,7 @@ err: static int osd_objset_open(struct osd_device *o) { uint64_t version = ZPL_VERSION; - uint64_t sa_obj; + uint64_t sa_obj, unlink_obj; int rc; ENTRY; @@ -891,7 +890,7 @@ static int osd_objset_open(struct osd_device *o) } rc = -zap_lookup(o->od_os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, - 8, 1, &o->od_unlinkedid); + 8, 1, &unlink_obj); if (rc) { CERROR("%s: lookup for %s failed: rc = %d\n", o->od_svname, ZFS_UNLINKED_SET, rc); @@ -907,6 +906,13 @@ static int osd_objset_open(struct osd_device *o) GOTO(out, rc = -ENOTSUPP); } + rc = __osd_obj2dnode(o->od_os, unlink_obj, &o->od_unlinked); + if (rc) { + CERROR("%s: can't get dnode for unlinked: rc = %d\n", + o->od_svname, rc); + GOTO(out, rc); + } + out: if (rc != 0 && o->od_os != NULL) { dmu_objset_disown(o->od_os, o); @@ -916,9 +922,10 @@ out: RETURN(rc); } -static int -osd_unlinked_object_free(struct osd_device *osd, uint64_t oid) +int osd_unlinked_object_free(const struct lu_env *env, struct osd_device *osd, + uint64_t oid) { + char *key = osd_oti_get(env)->oti_str; int rc; dmu_tx_t *tx; @@ -939,7 +946,8 @@ osd_unlinked_object_free(struct osd_device *osd, uint64_t oid) tx = dmu_tx_create(osd->od_os); dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END); - dmu_tx_hold_zap(tx, osd->od_unlinkedid, FALSE, NULL); + osd_tx_hold_zap(tx, osd->od_unlinked->dn_object, osd->od_unlinked, + FALSE, NULL); rc = -dmu_tx_assign(tx, TXG_WAIT); if (rc != 0) { CWARN("%s: Cannot assign tx for %llu: rc = %d\n", @@ -947,7 +955,9 @@ osd_unlinked_object_free(struct osd_device *osd, uint64_t oid) goto failed; } - rc = -zap_remove_int(osd->od_os, osd->od_unlinkedid, oid, tx); + snprintf(key, sizeof(osd_oti_get(env)->oti_str), "%llx", oid); + rc = osd_zap_remove(osd, osd->od_unlinked->dn_object, + osd->od_unlinked, key, tx); if (rc != 0) { CWARN("%s: Cannot remove %llu from unlinked set: rc = %d\n", osd->od_svname, oid, rc); @@ -977,13 +987,13 @@ osd_unlinked_drain(const struct lu_env *env, struct osd_device *osd) zap_cursor_t zc; zap_attribute_t *za = &osd_oti_get(env)->oti_za; - zap_cursor_init(&zc, osd->od_os, osd->od_unlinkedid); + zap_cursor_init(&zc, osd->od_os, osd->od_unlinked->dn_object); while (zap_cursor_retrieve(&zc, za) == 0) { /* If cannot free the object, leave it in the unlinked set, * until the OSD is mounted again when obd_unlinked_drain() * will be called. */ - if (osd_unlinked_object_free(osd, za->za_first_integer) != 0) + if (osd_unlinked_object_free(env, osd, za->za_first_integer)) break; zap_cursor_advance(&zc); } @@ -1045,13 +1055,22 @@ static int osd_mount(const struct lu_env *env, if (rc) GOTO(err, rc); - rc = __osd_obj2dnode(env, o->od_os, o->od_rootid, &rootdn); + rc = __osd_obj2dnode(o->od_os, o->od_rootid, &rootdn); if (rc) GOTO(err, rc); - o->od_root = rootdn->dn_object; osd_dnode_rele(rootdn); + rc = __osd_obj2dnode(o->od_os, DMU_USERUSED_OBJECT, + &o->od_userused_dn); + if (rc) + GOTO(err, rc); + + rc = __osd_obj2dnode(o->od_os, DMU_GROUPUSED_OBJECT, + &o->od_groupused_dn); + if (rc) + GOTO(err, rc); + /* 1. initialize oi before any file create or file open */ rc = osd_oi_init(env, o); if (rc) @@ -1108,6 +1127,19 @@ static void osd_umount(const struct lu_env *env, struct osd_device *o) CERROR("%s: lost %d pinned dbuf(s)\n", o->od_svname, atomic_read(&o->od_zerocopy_pin)); + if (o->od_unlinked) { + osd_dnode_rele(o->od_unlinked); + o->od_unlinked = NULL; + } + if (o->od_userused_dn) { + osd_dnode_rele(o->od_userused_dn); + o->od_userused_dn = NULL; + } + if (o->od_groupused_dn) { + osd_dnode_rele(o->od_groupused_dn); + o->od_groupused_dn = NULL; + } + if (o->od_os != NULL) { if (!o->od_dt_dev.dd_rdonly) /* force a txg sync to get all commit callbacks */ diff --git a/lustre/osd-zfs/osd_index.c b/lustre/osd-zfs/osd_index.c index 1dddb40..e3083ef 100644 --- a/lustre/osd-zfs/osd_index.c +++ b/lustre/osd-zfs/osd_index.c @@ -442,9 +442,9 @@ static int osd_dir_lookup(const struct lu_env *env, struct dt_object *dt, } memset(&oti->oti_zde.lzd_fid, 0, sizeof(struct lu_fid)); - rc = -zap_lookup(osd->od_os, obj->oo_dn->dn_object, - (char *)key, 8, sizeof(oti->oti_zde) / 8, - (void *)&oti->oti_zde); + rc = osd_zap_lookup(osd, obj->oo_dn->dn_object, obj->oo_dn, + (char *)key, 8, sizeof(oti->oti_zde) / 8, + (void *)&oti->oti_zde); if (rc != 0) RETURN(rc); @@ -490,7 +490,7 @@ static int osd_declare_dir_insert(const struct lu_env *env, /* do not specify the key as then DMU is trying to look it up * which is very expensive. usually the layers above lookup * before insertion */ - dmu_tx_hold_zap(oh->ot_tx, object, TRUE, NULL); + osd_tx_hold_zap(oh->ot_tx, object, obj->oo_dn, TRUE, NULL); osd_idc_find_or_init(env, osd, fid); @@ -629,9 +629,9 @@ static int osd_dir_insert(const struct lu_env *env, struct dt_object *dt, oti->oti_zde.lzd_fid = *fid; /* Insert (key,oid) into ZAP */ - rc = -zap_add(osd->od_os, parent->oo_dn->dn_object, - (char *)key, 8, sizeof(oti->oti_zde) / 8, - (void *)&oti->oti_zde, oh->ot_tx); + rc = osd_zap_add(osd, parent->oo_dn->dn_object, parent->oo_dn, + (char *)key, 8, sizeof(oti->oti_zde) / 8, + (void *)&oti->oti_zde, oh->ot_tx); if (unlikely(rc == -EEXIST && name[0] == '.' && name[1] == '.' && name[2] == 0)) /* Update (key,oid) in ZAP */ @@ -670,7 +670,7 @@ static int osd_declare_dir_delete(const struct lu_env *env, /* do not specify the key as then DMU is trying to look it up * which is very expensive. usually the layers above lookup * before deletion */ - dmu_tx_hold_zap(oh->ot_tx, dnode, FALSE, NULL); + osd_tx_hold_zap(oh->ot_tx, dnode, obj->oo_dn, FALSE, NULL); RETURN(0); } @@ -704,8 +704,8 @@ static int osd_dir_delete(const struct lu_env *env, struct dt_object *dt, } /* Remove key from the ZAP */ - rc = -zap_remove(osd->od_os, zap_dn->dn_object, - (char *) key, oh->ot_tx); + rc = osd_zap_remove(osd, zap_dn->dn_object, zap_dn, + (char *)key, oh->ot_tx); if (unlikely(rc && rc != -ENOENT)) CERROR("%s: zap_remove failed: rc = %d\n", osd->od_svname, rc); @@ -964,8 +964,9 @@ static int osd_dir_it_rec(const struct lu_env *env, const struct dt_it *di, GOTO(out, rc = -EIO); } - rc = -zap_lookup(it->ozi_zc->zc_objset, it->ozi_zc->zc_zapobj, - za->za_name, za->za_integer_length, 3, zde); + rc = osd_zap_lookup(osd_obj2dev(it->ozi_obj), it->ozi_zc->zc_zapobj, + it->ozi_obj->oo_dn, za->za_name, + za->za_integer_length, 3, zde); if (rc) GOTO(out, rc); @@ -1148,12 +1149,11 @@ static int osd_declare_index_insert(const struct lu_env *env, LASSERT(obj->oo_dn); - dmu_tx_hold_bonus(oh->ot_tx, obj->oo_dn->dn_object); - /* do not specify the key as then DMU is trying to look it up * which is very expensive. usually the layers above lookup * before insertion */ - dmu_tx_hold_zap(oh->ot_tx, obj->oo_dn->dn_object, TRUE, NULL); + osd_tx_hold_zap(oh->ot_tx, obj->oo_dn->dn_object, obj->oo_dn, + TRUE, NULL); RETURN(0); } @@ -1204,7 +1204,8 @@ static int osd_declare_index_delete(const struct lu_env *env, /* do not specify the key as then DMU is trying to look it up * which is very expensive. usually the layers above lookup * before deletion */ - dmu_tx_hold_zap(oh->ot_tx, obj->oo_dn->dn_object, FALSE, NULL); + osd_tx_hold_zap(oh->ot_tx, obj->oo_dn->dn_object, obj->oo_dn, + FALSE, NULL); RETURN(0); } diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index 5e8a156..4d9656e 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -267,7 +267,7 @@ struct osd_device { /* information about underlying file system */ struct objset *od_os; uint64_t od_rootid; /* id of root znode */ - uint64_t od_unlinkedid; /* id of unlinked zapobj */ + dnode_t *od_unlinked; /* dnode of unlinked zapobj */ /* SA attr mapping->id, * name is the same as in ZFS to use defines SA_ZPL_...*/ sa_attr_type_t *z_attr_table; @@ -295,8 +295,10 @@ struct osd_device { struct lu_site od_site; /* object IDs of the inode accounting indexes */ - uint64_t od_iusr_oid; - uint64_t od_igrp_oid; + uint64_t od_iusr_oid; + uint64_t od_igrp_oid; + dnode_t *od_groupused_dn; + dnode_t *od_userused_dn; /* quota slave instance */ struct qsd_instance *od_quota_slave; @@ -373,7 +375,7 @@ struct osd_object { int osd_statfs(const struct lu_env *, struct dt_device *, struct obd_statfs *); extern const struct dt_index_operations osd_acct_index_ops; -int osd_quota_fid2dmu(const struct lu_fid *fid, uint64_t *oid); +dnode_t *osd_quota_fid2dmu(const struct osd_device *, const struct lu_fid *fid); extern struct lu_device_operations osd_lu_ops; extern struct dt_index_operations osd_dir_ops; int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, @@ -382,6 +384,8 @@ int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, bool force); uint64_t osd_objs_count_estimate(uint64_t refdbytes, uint64_t usedobjs, uint64_t nrblocks, uint64_t est_maxblockshift); +int osd_unlinked_object_free(const struct lu_env *env, struct osd_device *osd, + uint64_t oid); /* * Helpers. @@ -485,8 +489,7 @@ int osd_procfs_fini(struct osd_device *osd); /* osd_object.c */ extern char *osd_obj_tag; void osd_object_sa_dirty_rele(struct osd_thandle *oh); -int __osd_obj2dnode(const struct lu_env *env, objset_t *os, - uint64_t oid, dnode_t **dnp); +int __osd_obj2dnode(objset_t *os, uint64_t oid, dnode_t **dnp); struct lu_object *osd_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *d); @@ -507,7 +510,8 @@ void osd_oi_fini(const struct lu_env *env, struct osd_device *o); int osd_fid_lookup(const struct lu_env *env, struct osd_device *, const struct lu_fid *, uint64_t *); uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, char *buf, int bufsize); + const struct lu_fid *fid, char *buf, int bufsize, + dnode_t **zdn); int osd_options_init(void); int osd_ost_seq_exists(const struct lu_env *env, struct osd_device *osd, __u64 seq); @@ -772,4 +776,93 @@ static inline bool osd_dmu_userobj_accounting_available(struct osd_device *osd) } #endif /* #ifdef HAVE_DMU_USEROBJ_ACCOUNTING */ +static inline int osd_zap_add(struct osd_device *osd, uint64_t zap, + dnode_t *dn, const char *key, + int int_size, int int_num, + const void *val, dmu_tx_t *tx) +{ + LASSERT(zap != 0); + +#ifdef HAVE_ZAP_ADD_BY_DNODE + if (dn) + return -zap_add_by_dnode(dn, key, int_size, int_num, val, tx); +#endif + return -zap_add(osd->od_os, zap, key, int_size, int_num, val, tx); +} + +static inline int osd_zap_remove(struct osd_device *osd, uint64_t zap, + dnode_t *dn, const char *key, + dmu_tx_t *tx) +{ + LASSERT(zap != 0); + +#ifdef HAVE_ZAP_ADD_BY_DNODE + if (dn) + return -zap_remove_by_dnode(dn, key, tx); +#endif + return -zap_remove(osd->od_os, zap, key, tx); +} + + +static inline int osd_zap_lookup(struct osd_device *osd, uint64_t zap, + dnode_t *dn, const char *key, + int int_size, int int_num, void *v) +{ + LASSERT(zap != 0); + +#ifdef HAVE_ZAP_ADD_BY_DNODE + if (dn) + return -zap_lookup_by_dnode(dn, key, int_size, int_num, v); +#endif + return -zap_lookup(osd->od_os, zap, key, int_size, int_num, v); +} + +static inline void osd_tx_hold_zap(dmu_tx_t *tx, uint64_t zap, + dnode_t *dn, int add, const char *name) +{ +#ifdef HAVE_DMU_TX_HOLD_ZAP_BY_DNODE + if (dn) { + dmu_tx_hold_zap_by_dnode(tx, dn, add, name); + return; + } +#endif + dmu_tx_hold_zap(tx, zap, add, name); +} + +static inline void osd_tx_hold_write(dmu_tx_t *tx, uint64_t oid, + dnode_t *dn, uint64_t off, int len) +{ +#ifdef HAVE_DMU_TX_HOLD_ZAP_BY_DNODE + if (dn) { + dmu_tx_hold_write_by_dnode(tx, dn, off, len); + return; + } +#endif + dmu_tx_hold_write(tx, oid, off, len); +} + +static inline void osd_dmu_write(struct osd_device *osd, dnode_t *dn, + uint64_t offset, uint64_t size, + const char *buf, dmu_tx_t *tx) +{ + LASSERT(dn); +#ifdef HAVE_DMU_WRITE_BY_DNODE + dmu_write_by_dnode(dn, offset, size, buf, tx); +#else + dmu_write(osd->od_os, dn->dn_object, offset, size, buf, tx); +#endif +} + +static inline int osd_dmu_read(struct osd_device *osd, dnode_t *dn, + uint64_t offset, uint64_t size, + char *buf, int flags) +{ + LASSERT(dn); +#ifdef HAVE_DMU_READ_BY_DNODE + return -dmu_read_by_dnode(dn, offset, size, buf, flags); +#else + return -dmu_read(osd->od_os, dn->dn_object, offset, size, buf, flags); +#endif +} + #endif /* _OSD_INTERNAL_H */ diff --git a/lustre/osd-zfs/osd_io.c b/lustre/osd-zfs/osd_io.c index 1d3b230..94eee7d 100644 --- a/lustre/osd-zfs/osd_io.c +++ b/lustre/osd-zfs/osd_io.c @@ -135,8 +135,8 @@ static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, record_start_io(osd, READ, 0); - rc = -dmu_read(osd->od_os, obj->oo_dn->dn_object, *pos, size, - buf->lb_buf, DMU_READ_PREFETCH); + rc = osd_dmu_read(osd, obj->oo_dn, *pos, size, buf->lb_buf, + DMU_READ_PREFETCH); record_end_io(osd, READ, cfs_time_current() - start, size, size >> PAGE_SHIFT); @@ -176,7 +176,7 @@ static ssize_t osd_declare_write(const struct lu_env *env, struct dt_object *dt, if (pos == -1) pos = max_t(loff_t, 256 * 8 * LLOG_MIN_CHUNK_SIZE, obj->oo_attr.la_size + (2 << 20)); - dmu_tx_hold_write(oh->ot_tx, oid, pos, buf->lb_len); + osd_tx_hold_write(oh->ot_tx, oid, obj->oo_dn, pos, buf->lb_len); /* dt_declare_write() is usually called for system objects, such * as llog or last_rcvd files. We needn't enforce quota on those @@ -204,8 +204,8 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); - dmu_write(osd->od_os, obj->oo_dn->dn_object, offset, - (uint64_t)buf->lb_len, buf->lb_buf, oh->ot_tx); + osd_dmu_write(osd, obj->oo_dn, offset, (uint64_t)buf->lb_len, + buf->lb_buf, oh->ot_tx); write_lock(&obj->oo_attr_lock); if (obj->oo_attr.la_size < offset + buf->lb_len) { obj->oo_attr.la_size = offset + buf->lb_len; @@ -596,8 +596,8 @@ static int osd_declare_write_commit(const struct lu_env *env, continue; } - dmu_tx_hold_write(oh->ot_tx, obj->oo_dn->dn_object, - offset, size); + osd_tx_hold_write(oh->ot_tx, obj->oo_dn->dn_object, + obj->oo_dn, offset, size); /* Estimating space to be consumed by a write is rather * complicated with ZFS. As a consequence, we don't account for * indirect blocks and just use as a rough estimate the worse @@ -611,13 +611,11 @@ static int osd_declare_write_commit(const struct lu_env *env, } if (size) { - dmu_tx_hold_write(oh->ot_tx, obj->oo_dn->dn_object, + osd_tx_hold_write(oh->ot_tx, obj->oo_dn->dn_object, obj->oo_dn, offset, size); space += osd_roundup2blocksz(size, offset, blksz); } - dmu_tx_hold_sa(oh->ot_tx, obj->oo_sa_hdl, 0); - oh->ot_write_commit = 1; /* used in osd_trans_start() for fail_loc */ /* backend zfs filesystem might be configured to store multiple data @@ -774,9 +772,9 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, } if (lnb[i].lnb_page->mapping == (void *)obj) { - dmu_write(osd->od_os, obj->oo_dn->dn_object, - lnb[i].lnb_file_offset, lnb[i].lnb_len, - kmap(lnb[i].lnb_page), oh->ot_tx); + osd_dmu_write(osd, obj->oo_dn, lnb[i].lnb_file_offset, + lnb[i].lnb_len, kmap(lnb[i].lnb_page), + oh->ot_tx); kunmap(lnb[i].lnb_page); } else if (lnb[i].lnb_data) { LASSERT(((unsigned long)lnb[i].lnb_data & 1) == 0); @@ -959,9 +957,6 @@ static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt, read_unlock(&obj->oo_attr_lock); } - /* ... and we'll modify size attribute */ - dmu_tx_hold_sa(oh->ot_tx, obj->oo_sa_hdl, 0); - RETURN(osd_declare_quota(env, osd, obj->oo_attr.la_uid, obj->oo_attr.la_gid, 0, oh, true, NULL, false)); diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index 4a1dfe1..e07b982 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -251,10 +251,8 @@ out_sa: RETURN(rc); } -int __osd_obj2dnode(const struct lu_env *env, objset_t *os, - uint64_t oid, dnode_t **dnp) +int __osd_obj2dnode(objset_t *os, uint64_t oid, dnode_t **dnp) { - dmu_object_info_t *doi = &osd_oti_get(env)->oti_doi; dmu_buf_t *db; dmu_buf_impl_t *dbi; int rc; @@ -266,15 +264,7 @@ int __osd_obj2dnode(const struct lu_env *env, objset_t *os, dbi = (dmu_buf_impl_t *)db; DB_DNODE_ENTER(dbi); *dnp = DB_DNODE(dbi); - LASSERT(*dnp != NULL); - dmu_object_info_from_dnode(*dnp, doi); - if (unlikely (oid != DMU_USERUSED_OBJECT && - oid != DMU_GROUPUSED_OBJECT && doi->doi_bonus_type != DMU_OT_SA)) { - osd_dnode_rele(*dnp); - *dnp = NULL; - return -EINVAL; - } return 0; } @@ -408,7 +398,7 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l, rc = osd_fid_lookup(env, osd, lu_object_fid(l), &oid); if (rc == 0) { LASSERT(obj->oo_dn == NULL); - rc = __osd_obj2dnode(env, osd->od_os, oid, &obj->oo_dn); + rc = __osd_obj2dnode(osd->od_os, oid, &obj->oo_dn); /* EEXIST will be returned if object is being deleted in ZFS */ if (rc == -EEXIST) { rc = 0; @@ -498,6 +488,7 @@ static int osd_declare_object_destroy(const struct lu_env *env, struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); struct osd_thandle *oh; + dnode_t *dn; int rc; uint64_t zapid; ENTRY; @@ -509,8 +500,8 @@ static int osd_declare_object_destroy(const struct lu_env *env, LASSERT(oh->ot_tx != NULL); /* declare that we'll remove object from fid-dnode mapping */ - zapid = osd_get_name_n_idx(env, osd, fid, NULL, 0); - dmu_tx_hold_zap(oh->ot_tx, zapid, FALSE, NULL); + zapid = osd_get_name_n_idx(env, osd, fid, NULL, 0, &dn); + osd_tx_hold_zap(oh->ot_tx, zapid, dn, FALSE, NULL); osd_declare_xattrs_destroy(env, obj, oh); @@ -531,7 +522,8 @@ static int osd_declare_object_destroy(const struct lu_env *env, dmu_tx_hold_free(oh->ot_tx, obj->oo_dn->dn_object, 0, DMU_OBJECT_END); else - dmu_tx_hold_zap(oh->ot_tx, osd->od_unlinkedid, TRUE, NULL); + osd_tx_hold_zap(oh->ot_tx, osd->od_unlinked->dn_object, + osd->od_unlinked, TRUE, NULL); /* will help to find FID->ino when this object is being * added to PENDING/ */ @@ -551,6 +543,7 @@ static int osd_object_destroy(const struct lu_env *env, struct osd_thandle *oh; int rc; uint64_t oid, zapid; + dnode_t *zdn; ENTRY; down_write(&obj->oo_guard); @@ -565,8 +558,9 @@ static int osd_object_destroy(const struct lu_env *env, LASSERT(oh->ot_tx != NULL); /* remove obj ref from index dir (it depends) */ - zapid = osd_get_name_n_idx(env, osd, fid, buf, sizeof(info->oti_str)); - rc = -zap_remove(osd->od_os, zapid, buf, oh->ot_tx); + zapid = osd_get_name_n_idx(env, osd, fid, buf, + sizeof(info->oti_str), &zdn); + rc = osd_zap_remove(osd, zapid, zdn, buf, oh->ot_tx); if (rc) { CERROR("%s: zap_remove(%s) failed: rc = %d\n", osd->od_svname, buf, rc); @@ -597,12 +591,15 @@ static int osd_object_destroy(const struct lu_env *env, CERROR("%s: failed to free %s %llu: rc = %d\n", osd->od_svname, buf, oid, rc); } else { /* asynchronous destroy */ + char *key = info->oti_key; + rc = osd_object_unlinked_add(obj, oh); if (rc) GOTO(out, rc); - rc = -zap_add_int(osd->od_os, osd->od_unlinkedid, - oid, oh->ot_tx); + snprintf(key, sizeof(info->oti_key), "%llx", oid); + rc = osd_zap_add(osd, osd->od_unlinked->dn_object, + osd->od_unlinked, key, 8, 1, &oid, oh->ot_tx); if (rc) CERROR("%s: zap_add_int() failed %s %llu: rc = %d\n", osd->od_svname, buf, oid, rc); @@ -1085,6 +1082,7 @@ static int osd_declare_object_create(const struct lu_env *env, struct osd_device *osd = osd_obj2dev(obj); struct osd_thandle *oh; uint64_t zapid; + dnode_t *dn; int rc, dnode_size; ENTRY; @@ -1133,8 +1131,8 @@ static int osd_declare_object_create(const struct lu_env *env, } /* and we'll add it to some mapping */ - zapid = osd_get_name_n_idx(env, osd, fid, NULL, 0); - dmu_tx_hold_zap(oh->ot_tx, zapid, TRUE, NULL); + zapid = osd_get_name_n_idx(env, osd, fid, NULL, 0, &dn); + osd_tx_hold_zap(oh->ot_tx, zapid, dn, TRUE, NULL); /* will help to find FID->ino mapping at dt_insert() */ osd_idc_find_and_init(env, osd, obj); @@ -1242,7 +1240,7 @@ static int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx, } if (unlikely(*dnp == NULL)) - rc = __osd_obj2dnode(env, tx->tx_objset, oid, dnp); + rc = __osd_obj2dnode(tx->tx_objset, oid, dnp); return rc; } @@ -1448,7 +1446,7 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, struct osd_device *osd = osd_obj2dev(obj); char *buf = info->oti_str; struct osd_thandle *oh; - dnode_t *dn = NULL; + dnode_t *dn = NULL, *zdn = NULL; uint64_t zapid, parent = 0; int rc; @@ -1491,9 +1489,9 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, zde->zde_dnode = dn->dn_object; zde->zde_type = IFTODT(attr->la_mode & S_IFMT); - zapid = osd_get_name_n_idx(env, osd, fid, buf, sizeof(info->oti_str)); - - rc = -zap_add(osd->od_os, zapid, buf, 8, 1, zde, oh->ot_tx); + zapid = osd_get_name_n_idx(env, osd, fid, buf, + sizeof(info->oti_str), &zdn); + rc = osd_zap_add(osd, zapid, zdn, buf, 8, 1, zde, oh->ot_tx); if (rc) GOTO(out, rc); obj->oo_dn = dn; diff --git a/lustre/osd-zfs/osd_oi.c b/lustre/osd-zfs/osd_oi.c index 230d103..7f437ca 100644 --- a/lustre/osd-zfs/osd_oi.c +++ b/lustre/osd-zfs/osd_oi.c @@ -434,7 +434,7 @@ static void osd_fid2str(char *buf, const struct lu_fid *fid) */ static uint64_t osd_get_idx_for_fid(struct osd_device *osd, const struct lu_fid *fid, - char *buf) + char *buf, dnode_t **zdn) { struct osd_oi *oi; @@ -442,16 +442,21 @@ osd_get_idx_for_fid(struct osd_device *osd, const struct lu_fid *fid, oi = osd->od_oi_table[fid_seq(fid) & (osd->od_oi_count - 1)]; if (buf) osd_fid2str(buf, fid); + if (zdn) + *zdn = oi->oi_dn; return oi->oi_zapid; } uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, char *buf, int bufsize) + const struct lu_fid *fid, char *buf, int bufsize, + dnode_t **zdn) { uint64_t zapid; LASSERT(fid); + if (zdn != NULL) + *zdn = NULL; if (fid_is_on_ost(env, osd, fid) == 1 || fid_seq(fid) == FID_SEQ_ECHO) { zapid = osd_get_idx_for_ost_obj(env, osd, fid, buf, bufsize); @@ -466,10 +471,10 @@ uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, if (fid_is_acct(fid)) zapid = MASTER_NODE_OBJ; } else { - zapid = osd_get_idx_for_fid(osd, fid, buf); + zapid = osd_get_idx_for_fid(osd, fid, buf, NULL); } } else { - zapid = osd_get_idx_for_fid(osd, fid, buf); + zapid = osd_get_idx_for_fid(osd, fid, buf, zdn); } return zapid; @@ -507,7 +512,8 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev, { struct osd_thread_info *info = osd_oti_get(env); char *buf = info->oti_buf; - uint64_t zapid; + dnode_t *zdn; + uint64_t zapid; int rc = 0; ENTRY; @@ -522,9 +528,9 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev, *oid = dev->od_root; } else { zapid = osd_get_name_n_idx(env, dev, fid, buf, - sizeof(info->oti_buf)); - rc = -zap_lookup(dev->od_os, zapid, buf, - 8, 1, &info->oti_zde); + sizeof(info->oti_buf), &zdn); + rc = osd_zap_lookup(dev, zapid, zdn, buf, + 8, 1, &info->oti_zde); if (rc) RETURN(rc); *oid = info->oti_zde.lzd_reg.zde_dnode; @@ -580,7 +586,7 @@ osd_oi_add_table(const struct lu_env *env, struct osd_device *o, } o->od_oi_table[key] = oi; - __osd_obj2dnode(env, o->od_os, oi->oi_zapid, &oi->oi_dn); + __osd_obj2dnode(o->od_os, oi->oi_zapid, &oi->oi_dn); return 0; } diff --git a/lustre/osd-zfs/osd_quota.c b/lustre/osd-zfs/osd_quota.c index 14ba2cd..9a425e7 100644 --- a/lustre/osd-zfs/osd_quota.c +++ b/lustre/osd-zfs/osd_quota.c @@ -34,23 +34,13 @@ /** * Helper function to retrieve DMU object id from fid for accounting object */ -inline int osd_quota_fid2dmu(const struct lu_fid *fid, uint64_t *oid) +dnode_t *osd_quota_fid2dmu(const struct osd_device *osd, + const struct lu_fid *fid) { - int rc = 0; - LASSERT(fid_is_acct(fid)); - switch (fid_oid(fid)) { - case ACCT_GROUP_OID: - *oid = DMU_GROUPUSED_OBJECT; - break; - case ACCT_USER_OID: - *oid = DMU_USERUSED_OBJECT; - break; - default: - rc = -EINVAL; - break; - } - return rc; + if (fid_oid(fid) == ACCT_GROUP_OID) + return osd->od_groupused_dn; + return osd->od_userused_dn; } /** @@ -108,24 +98,22 @@ static int osd_acct_index_lookup(const struct lu_env *env, struct osd_object *obj = osd_dt_obj(dtobj); struct osd_device *osd = osd_obj2dev(obj); int rc; - uint64_t oid = 0; + dnode_t *dn; ENTRY; rec->bspace = rec->ispace = 0; /* convert the 64-bit uid/gid into a string */ snprintf(buf, buflen, "%llx", *((__u64 *)dtkey)); - /* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be + /* fetch DMU object (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be * used */ - rc = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu), &oid); - if (rc) - RETURN(rc); + dn = osd_quota_fid2dmu(osd, lu_object_fid(&dtobj->do_lu)); /* disk usage (in bytes) is maintained by DMU. * DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which * not associated with any dmu_but_t (see dnode_special_open()). */ - rc = zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1, - &rec->bspace); + rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, sizeof(uint64_t), 1, + &rec->bspace); if (rc == -ENOENT) { /* user/group has not created anything yet */ CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n", @@ -142,8 +130,8 @@ static int osd_acct_index_lookup(const struct lu_env *env, } else { snprintf(buf, buflen, OSD_DMU_USEROBJ_PREFIX "%llx", *((__u64 *)dtkey)); - rc = zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1, - &rec->ispace); + rc = osd_zap_lookup(osd, dn->dn_object, dn, buf, + sizeof(uint64_t), 1, &rec->ispace); if (rc == -ENOENT) { CDEBUG(D_QUOTA, "%s: id %s not found dnode accounting\n", @@ -170,7 +158,8 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, struct osd_it_quota *it; struct lu_object *lo = &dt->do_lu; struct osd_device *osd = osd_dev(lo->lo_dev); - int rc; + dnode_t *dn; + int rc; ENTRY; LASSERT(lu_object_exists(lo)); @@ -183,9 +172,8 @@ static struct dt_it *osd_it_acct_init(const struct lu_env *env, RETURN(ERR_PTR(-ENOMEM)); memset(it, 0, sizeof(*it)); - rc = osd_quota_fid2dmu(lu_object_fid(lo), &it->oiq_oid); - if (rc) - RETURN(ERR_PTR(rc)); + dn = osd_quota_fid2dmu(osd, lu_object_fid(lo)); + it->oiq_oid = dn->dn_object; /* initialize zap cursor */ rc = osd_zap_cursor_init(&it->oiq_zc, osd->od_os, it->oiq_oid, 0); @@ -283,15 +271,19 @@ static int osd_it_acct_key_size(const struct lu_env *env, * to read bytes we need to call zap_lookup explicitly. */ static int osd_zap_cursor_retrieve_value(const struct lu_env *env, - zap_cursor_t *zc, char *buf, - int buf_size, int *bytes_read) + struct osd_it_quota *it, + char *buf, int buf_size, + int *bytes_read) { + const struct lu_fid *fid = lu_object_fid(&it->oiq_obj->oo_dt.do_lu); zap_attribute_t *za = &osd_oti_get(env)->oti_za; + zap_cursor_t *zc = it->oiq_zc; + struct osd_device *osd = osd_obj2dev(it->oiq_obj); int rc, actual_size; rc = -zap_cursor_retrieve(zc, za); if (unlikely(rc != 0)) - return -rc; + return rc; if (unlikely(za->za_integer_length <= 0)) return -ERANGE; @@ -305,10 +297,10 @@ static int osd_zap_cursor_retrieve_value(const struct lu_env *env, buf_size = za->za_num_integers; } - rc = -zap_lookup(zc->zc_objset, zc->zc_zapobj, - za->za_name, za->za_integer_length, - buf_size, buf); - + /* use correct special ID to request bytes used */ + rc = osd_zap_lookup(osd, fid_oid(fid) == ACCT_GROUP_OID ? + DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT, NULL, + za->za_name, za->za_integer_length, buf_size, buf); if (likely(rc == 0)) *bytes_read = actual_size; @@ -339,8 +331,7 @@ static int osd_it_acct_rec(const struct lu_env *env, rec->ispace = rec->bspace = 0; /* retrieve block usage from the DMU accounting object */ - rc = osd_zap_cursor_retrieve_value(env, it->oiq_zc, - (char *)&rec->bspace, + rc = osd_zap_cursor_retrieve_value(env, it, (char *)&rec->bspace, sizeof(uint64_t), &bytes_read); if (rc) RETURN(rc); @@ -359,8 +350,9 @@ static int osd_it_acct_rec(const struct lu_env *env, /* inode accounting is not maintained by DMU, so we use our own ZAP to * track inode usage */ - rc = -zap_lookup(osd->od_os, it->oiq_obj->oo_dn->dn_object, - za->za_name, sizeof(uint64_t), 1, &rec->ispace); + rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object, + it->oiq_obj->oo_dn, za->za_name, sizeof(uint64_t), + 1, &rec->ispace); if (rc == -ENOENT) /* user/group has not created any file yet */ CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n", diff --git a/lustre/osd-zfs/osd_xattr.c b/lustre/osd-zfs/osd_xattr.c index c220e0e..f643f8a 100644 --- a/lustre/osd-zfs/osd_xattr.c +++ b/lustre/osd-zfs/osd_xattr.c @@ -148,7 +148,7 @@ int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd, if (rc) return rc; - rc = __osd_obj2dnode(env, osd->od_os, xa_data_obj, &xa_data_dn); + rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn); if (rc) return rc; @@ -506,7 +506,7 @@ __osd_xattr_set(const struct lu_env *env, struct osd_object *obj, * Entry already exists. * We'll truncate the existing object. */ - rc = __osd_obj2dnode(env, osd->od_os, xa_data_obj, &xa_data_dn); + rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn); if (rc) goto out;