From d993de7b40cd9625b48e5361ae29f77bfceb207a Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Thu, 28 Sep 2017 14:30:55 +0300 Subject: [PATCH] LU-10041 osd: osd-zfs to choose dnode size depending on dnodesize property it can be: legacy (512 bytes), auto (512 bytes to 16K) or absolute size (512, 1024, 2048, 4096, 8192, 16384). Change-Id: Iea35d8ae850523440272467320410850821f484c Signed-off-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/29242 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Nathaniel Clark --- lustre/osd-zfs/osd_handler.c | 14 +++++++++++++ lustre/osd-zfs/osd_internal.h | 3 ++- lustre/osd-zfs/osd_object.c | 49 ++++++++++++++++++++++++++++++++++++++----- lustre/osd-zfs/osd_xattr.c | 2 +- 4 files changed, 61 insertions(+), 7 deletions(-) diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 25ffbc6..974a0a2 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -783,6 +783,13 @@ static void osd_readonly_changed_cb(void *arg, uint64_t newval) osd->od_prop_rdonly = !!newval; } +static void osd_dnodesize_changed_cb(void *arg, uint64_t newval) +{ + struct osd_device *osd = arg; + + osd->od_dnsize = newval; +} + /* * This function unregisters all registered callbacks. It's harmless to * unregister callbacks that were never registered so it is used to safely @@ -798,6 +805,8 @@ static void osd_objset_unregister_callbacks(struct osd_device *o) osd_recordsize_changed_cb, o); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY), osd_readonly_changed_cb, o); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DNODESIZE), + osd_readonly_changed_cb, o); if (o->arc_prune_cb != NULL) { arc_remove_prune_callback(o->arc_prune_cb); @@ -834,6 +843,11 @@ static int osd_objset_register_callbacks(struct osd_device *o) if (rc) GOTO(err, rc); + rc = -dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DNODESIZE), + osd_dnodesize_changed_cb, o); + if (rc) + GOTO(err, rc); + o->arc_prune_cb = arc_add_prune_callback(arc_prune_func, o); err: dsl_pool_config_exit(dp, FTAG); diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index 5091a59..503249e 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -290,6 +290,7 @@ struct osd_device { od_xattr_in_sa:1, od_is_ost:1, od_posix_acl:1; + unsigned int od_dnsize; char od_mntdev[128]; char od_svname[128]; @@ -502,7 +503,7 @@ int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type, void *buf, uint32_t buflen, struct osd_thandle *oh); int __osd_zap_create(const struct lu_env *env, struct osd_device *osd, dnode_t **zap_dnp, dmu_tx_t *tx, struct lu_attr *la, - zap_flags_t flags); + unsigned dnsize, zap_flags_t flags); int __osd_object_create(const struct lu_env *env, struct osd_object *obj, dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la); int __osd_attr_init(const struct lu_env *env, struct osd_device *osd, diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index cceeecf..6eb175e 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -1309,6 +1309,43 @@ static int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx, return rc; } +#ifdef HAVE_DMU_OBJECT_ALLOC_DNSIZE +static int osd_find_dnsize(struct osd_object *obj) +{ + struct osd_device *osd = osd_obj2dev(obj); + int dnsize; + + if (osd->od_dnsize == ZFS_DNSIZE_AUTO) { + dnsize = DNODE_MIN_SIZE; + do { + if (DN_BONUS_SIZE(dnsize) >= obj->oo_ea_in_bonus + 32) + break; + dnsize <<= 1; + } while (dnsize < DNODE_MAX_SIZE); + if (dnsize > DNODE_MAX_SIZE) + dnsize = DNODE_MAX_SIZE; + } else if (osd->od_dnsize == ZFS_DNSIZE_1K) { + dnsize = 1024; + } else if (osd->od_dnsize == ZFS_DNSIZE_2K) { + dnsize = 2048; + } else if (osd->od_dnsize == ZFS_DNSIZE_4K) { + dnsize = 4096; + } else if (osd->od_dnsize == ZFS_DNSIZE_8K) { + dnsize = 8192; + } else if (osd->od_dnsize == ZFS_DNSIZE_16K) { + dnsize = 16384; + } else { + dnsize = DNODE_MIN_SIZE; + } + return dnsize; +} +#else +static int inline osd_find_dnsize(struct osd_object *obj) +{ + return DN_MAX_BONUSLEN; +} +#endif + /* * The transaction passed to this routine must have * dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned @@ -1329,7 +1366,8 @@ int __osd_object_create(const struct lu_env *env, struct osd_object *obj, type = DMU_OTN_UINT8_METADATA; /* Create a new DMU object using the default dnode size. */ - oid = osd_dmu_object_alloc(osd->od_os, type, 0, 0, tx); + oid = osd_dmu_object_alloc(osd->od_os, type, 0, + osd_find_dnsize(obj), tx); LASSERT(la->la_valid & LA_MODE); la->la_size = 0; @@ -1350,7 +1388,7 @@ int __osd_object_create(const struct lu_env *env, struct osd_object *obj, * a conversion from the different internal ZAP hash formats being used. */ int __osd_zap_create(const struct lu_env *env, struct osd_device *osd, dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la, - zap_flags_t flags) + unsigned dnsize, zap_flags_t flags) { uint64_t oid; @@ -1363,7 +1401,7 @@ int __osd_zap_create(const struct lu_env *env, struct osd_device *osd, DMU_OT_DIRECTORY_CONTENTS, 14, /* == ZFS fzap_default_blockshift */ DN_MAX_INDBLKSHIFT, /* indirect blockshift */ - 0, tx); + dnsize, tx); la->la_size = 2; la->la_nlink = 1; @@ -1383,7 +1421,7 @@ static dnode_t *osd_mkidx(const struct lu_env *env, struct osd_object *obj, * binary keys */ LASSERT(S_ISREG(la->la_mode)); rc = __osd_zap_create(env, osd_obj2dev(obj), &dn, oh->ot_tx, la, - ZAP_FLAG_UINT64_KEY); + osd_find_dnsize(obj), ZAP_FLAG_UINT64_KEY); if (rc) return ERR_PTR(rc); return dn; @@ -1396,7 +1434,8 @@ static dnode_t *osd_mkdir(const struct lu_env *env, struct osd_object *obj, int rc; LASSERT(S_ISDIR(la->la_mode)); - rc = __osd_zap_create(env, osd_obj2dev(obj), &dn, oh->ot_tx, la, 0); + rc = __osd_zap_create(env, osd_obj2dev(obj), &dn, oh->ot_tx, la, + osd_find_dnsize(obj), 0); if (rc) return ERR_PTR(rc); return dn; diff --git a/lustre/osd-zfs/osd_xattr.c b/lustre/osd-zfs/osd_xattr.c index 98032bc..cce0e99 100644 --- a/lustre/osd-zfs/osd_xattr.c +++ b/lustre/osd-zfs/osd_xattr.c @@ -579,7 +579,7 @@ __osd_xattr_set(const struct lu_env *env, struct osd_object *obj, la->la_valid = LA_MODE; la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; - rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0); + rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0, 0); if (rc) return rc; -- 1.8.3.1