From 9765c6174ef580fb4deef4e7faea6d5ed634b00f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 20 May 2016 15:57:09 -0700 Subject: [PATCH] LU-8068 osd-zfs: large dnode support In ZFS 0.7.x support for large dnode has been added. This allows Lustre to optionally specify the size of a dnode which ZFS will then use to store metadata such as xattrs. The default dnode size specified by the 'dnodesize' property on the dataset will be used unless a specific value is provided. Signed-off-by: Brian Behlendorf Signed-off-by: Ned Bass Change-Id: Iaa7211ca50e7089af4b1fabcccdaa6e16477e641 Reviewed-on: http://review.whamcloud.com/20367 Tested-by: Jenkins Reviewed-by: Alex Zhuravlev Reviewed-by: Nathaniel Clark Tested-by: Maloo Reviewed-by: Oleg Drokin --- config/lustre-build-zfs.m4 | 29 ++++++++++++++++++++++++++++ lustre/osd-zfs/osd_internal.h | 45 +++++++++++++++++++++++++++++++++++++++++++ lustre/osd-zfs/osd_object.c | 15 +++++++-------- 3 files changed, 81 insertions(+), 8 deletions(-) diff --git a/config/lustre-build-zfs.m4 b/config/lustre-build-zfs.m4 index 611e09d..3aa9314 100644 --- a/config/lustre-build-zfs.m4 +++ b/config/lustre-build-zfs.m4 @@ -450,6 +450,35 @@ your distribution. AC_DEFINE(HAVE_SPA_MAXBLOCKSIZE, 1, [Have spa_maxblocksize in ZFS]) ]) + dnl # + dnl # ZFS 0.7.x adds support for large dnodes. This + dnl # allows Lustre to optionally specify the size of a + dnl # dnode which ZFS will then use to store metadata such + dnl # as xattrs. The default dnode size specified by the + dnl # 'dnodesize' dataset property will be used unless a + dnl # specific value is provided. + dnl # + LB_CHECK_COMPILE([if zfs defines dmu_object_alloc_dnsize], + dmu_object_alloc_dnsize, [ + #include + #include + ],[ + objset_t *os = NULL; + dmu_object_type_t objtype = DMU_OT_NONE; + int blocksize = 0; + dmu_object_type_t bonustype = DMU_OT_SA; + int dnodesize = DNODE_MIN_SIZE; + dmu_tx_t *tx = NULL; + uint64_t id; + + id = dmu_object_alloc_dnsize(os, objtype, blocksize, + bonustype, + DN_BONUS_SIZE(dnodesize), + dnodesize, tx); + ],[ + AC_DEFINE(HAVE_DMU_OBJECT_ALLOC_DNSIZE, 1, + [Have dmu_object_alloc_dnsize in ZFS]) + ]) ]) AM_CONDITIONAL(ZFS_ENABLED, [test "x$enable_zfs" = xyes]) diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index a59ff87..e3daa1fe 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -595,4 +595,49 @@ osd_zio_buf_free(void *buf, size_t size) #define osd_zio_buf_free(buf, size) zio_buf_free(buf, size) #endif +#ifdef HAVE_DMU_OBJECT_ALLOC_DNSIZE +static inline uint64_t +osd_dmu_object_alloc(objset_t *os, dmu_object_type_t objtype, int blocksize, + int dnodesize, dmu_tx_t *tx) +{ + if (dnodesize == 0) + dnodesize = MAX(dmu_objset_dnodesize(os), DNODE_MIN_SIZE); + + return dmu_object_alloc_dnsize(os, objtype, blocksize, DMU_OT_SA, + DN_BONUS_SIZE(dnodesize), dnodesize, tx); +} + +static inline uint64_t +osd_zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, + int indirect_blockshift, int dnodesize, dmu_tx_t *tx) +{ + if (dnodesize == 0) + dnodesize = MAX(dmu_objset_dnodesize(os), DNODE_MIN_SIZE); + + return zap_create_flags_dnsize(os, normflags, flags, ot, + leaf_blockshift, indirect_blockshift, + DMU_OT_SA, DN_BONUS_SIZE(dnodesize), + dnodesize, tx); +} +#else +static inline uint64_t +osd_dmu_object_alloc(objset_t *os, dmu_object_type_t objtype, int blocksize, + int dnodesize, dmu_tx_t *tx) +{ + return dmu_object_alloc(os, objtype, blocksize, DMU_OT_SA, + DN_MAX_BONUSLEN, tx); +} + +static inline uint64_t +osd_zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, + int indirect_blockshift, int dnodesize, dmu_tx_t *tx) +{ + return zap_create_flags(os, normflags, flags, ot, leaf_blockshift, + indirect_blockshift, DMU_OT_SA, + DN_MAX_BONUSLEN, tx); +} +#endif /* HAVE_DMU_OBJECT_ALLOC_DNSIZE */ + #endif /* _OSD_INTERNAL_H */ diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index 2575ea4..b534906 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -1293,9 +1293,8 @@ int __osd_object_create(const struct lu_env *env, struct osd_object *obj, fid_seq_is_local_file(fid_seq(fid)))) type = DMU_OTN_UINT8_METADATA; - /* Create a new DMU object. */ - oid = dmu_object_alloc(osd->od_os, type, 0, - DMU_OT_SA, DN_MAX_BONUSLEN, tx); + /* Create a new DMU object using the default dnode size. */ + oid = osd_dmu_object_alloc(osd->od_os, type, 0, 0, tx); rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, dbp); LASSERTF(rc == 0, "sa_buf_hold "LPU64" failed: %d\n", oid, rc); @@ -1335,11 +1334,11 @@ int __osd_zap_create(const struct lu_env *env, struct osd_device *osd, transaction group. */ LASSERT(tx->tx_txg != 0); - oid = zap_create_flags(osd->od_os, 0, flags | ZAP_FLAG_HASH64, - DMU_OT_DIRECTORY_CONTENTS, - 14, /* == ZFS fzap_default_block_shift */ - DN_MAX_INDBLKSHIFT, /* indirect block shift */ - DMU_OT_SA, DN_MAX_BONUSLEN, tx); + oid = osd_zap_create_flags(osd->od_os, 0, flags | ZAP_FLAG_HASH64, + DMU_OT_DIRECTORY_CONTENTS, + 14, /* == ZFS fzap_default_blockshift */ + DN_MAX_INDBLKSHIFT, /* indirect blockshift */ + 0, tx); rc = -sa_buf_hold(osd->od_os, oid, osd_obj_tag, zap_dbp); if (rc) -- 1.8.3.1