Whamcloud - gitweb
LU-10041 osd: osd-zfs to choose dnode size 42/29242/3
authorAlex Zhuravlev <bzzz@whamcloud.com>
Thu, 28 Sep 2017 11:30:55 +0000 (14:30 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 25 Oct 2017 02:17:26 +0000 (02:17 +0000)
depending on dnodesize property it can be:
legacy (512 bytes), auto (512 bytes to 16K) or absolute
size (512, 1024, 2048, 4096, 8192, 16384).

Change-Id: Iea35d8ae850523440272467320410850821f484c
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/29242
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
lustre/osd-zfs/osd_handler.c
lustre/osd-zfs/osd_internal.h
lustre/osd-zfs/osd_object.c
lustre/osd-zfs/osd_xattr.c

index 25ffbc6..974a0a2 100644 (file)
@@ -783,6 +783,13 @@ static void osd_readonly_changed_cb(void *arg, uint64_t newval)
        osd->od_prop_rdonly = !!newval;
 }
 
+static void osd_dnodesize_changed_cb(void *arg, uint64_t newval)
+{
+       struct osd_device *osd = arg;
+
+       osd->od_dnsize = newval;
+}
+
 /*
  * This function unregisters all registered callbacks.  It's harmless to
  * unregister callbacks that were never registered so it is used to safely
@@ -798,6 +805,8 @@ static void osd_objset_unregister_callbacks(struct osd_device *o)
                                   osd_recordsize_changed_cb, o);
        (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
                                   osd_readonly_changed_cb, o);
+       (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DNODESIZE),
+                                  osd_readonly_changed_cb, o);
 
        if (o->arc_prune_cb != NULL) {
                arc_remove_prune_callback(o->arc_prune_cb);
@@ -834,6 +843,11 @@ static int osd_objset_register_callbacks(struct osd_device *o)
        if (rc)
                GOTO(err, rc);
 
+       rc = -dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DNODESIZE),
+                               osd_dnodesize_changed_cb, o);
+       if (rc)
+               GOTO(err, rc);
+
        o->arc_prune_cb = arc_add_prune_callback(arc_prune_func, o);
 err:
        dsl_pool_config_exit(dp, FTAG);
index 5091a59..503249e 100644 (file)
@@ -290,6 +290,7 @@ struct osd_device {
                                 od_xattr_in_sa:1,
                                 od_is_ost:1,
                                 od_posix_acl:1;
+       unsigned int             od_dnsize;
 
        char                     od_mntdev[128];
        char                     od_svname[128];
@@ -502,7 +503,7 @@ int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type,
                         void *buf, uint32_t buflen, struct osd_thandle *oh);
 int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
                     dnode_t **zap_dnp, dmu_tx_t *tx, struct lu_attr *la,
-                    zap_flags_t flags);
+                    unsigned dnsize, zap_flags_t flags);
 int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
                        dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la);
 int __osd_attr_init(const struct lu_env *env, struct osd_device *osd,
index cceeecf..6eb175e 100644 (file)
@@ -1309,6 +1309,43 @@ static int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx,
        return rc;
 }
 
+#ifdef HAVE_DMU_OBJECT_ALLOC_DNSIZE
+static int osd_find_dnsize(struct osd_object *obj)
+{
+       struct osd_device *osd = osd_obj2dev(obj);
+       int dnsize;
+
+       if (osd->od_dnsize == ZFS_DNSIZE_AUTO) {
+               dnsize = DNODE_MIN_SIZE;
+               do {
+                       if (DN_BONUS_SIZE(dnsize) >= obj->oo_ea_in_bonus + 32)
+                               break;
+                       dnsize <<= 1;
+               } while (dnsize < DNODE_MAX_SIZE);
+               if (dnsize > DNODE_MAX_SIZE)
+                       dnsize = DNODE_MAX_SIZE;
+       } else if (osd->od_dnsize == ZFS_DNSIZE_1K) {
+               dnsize = 1024;
+       } else if (osd->od_dnsize == ZFS_DNSIZE_2K) {
+               dnsize = 2048;
+       } else if (osd->od_dnsize == ZFS_DNSIZE_4K) {
+               dnsize = 4096;
+       } else if (osd->od_dnsize == ZFS_DNSIZE_8K) {
+               dnsize = 8192;
+       } else if (osd->od_dnsize == ZFS_DNSIZE_16K) {
+               dnsize = 16384;
+       } else {
+               dnsize = DNODE_MIN_SIZE;
+       }
+       return dnsize;
+}
+#else
+static int inline osd_find_dnsize(struct osd_object *obj)
+{
+       return DN_MAX_BONUSLEN;
+}
+#endif
+
 /*
  * The transaction passed to this routine must have
  * dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
@@ -1329,7 +1366,8 @@ int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
                type = DMU_OTN_UINT8_METADATA;
 
        /* Create a new DMU object using the default dnode size. */
-       oid = osd_dmu_object_alloc(osd->od_os, type, 0, 0, tx);
+       oid = osd_dmu_object_alloc(osd->od_os, type, 0,
+                                  osd_find_dnsize(obj), tx);
 
        LASSERT(la->la_valid & LA_MODE);
        la->la_size = 0;
@@ -1350,7 +1388,7 @@ int __osd_object_create(const struct lu_env *env, struct osd_object *obj,
  * a conversion from the different internal ZAP hash formats being used. */
 int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
                     dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la,
-                    zap_flags_t flags)
+                    unsigned dnsize, zap_flags_t flags)
 {
        uint64_t oid;
 
@@ -1363,7 +1401,7 @@ int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
                                   DMU_OT_DIRECTORY_CONTENTS,
                                   14, /* == ZFS fzap_default_blockshift */
                                   DN_MAX_INDBLKSHIFT, /* indirect blockshift */
-                                  0, tx);
+                                  dnsize, tx);
 
        la->la_size = 2;
        la->la_nlink = 1;
@@ -1383,7 +1421,7 @@ static dnode_t *osd_mkidx(const struct lu_env *env, struct osd_object *obj,
         * binary keys */
        LASSERT(S_ISREG(la->la_mode));
        rc = __osd_zap_create(env, osd_obj2dev(obj), &dn, oh->ot_tx, la,
-                             ZAP_FLAG_UINT64_KEY);
+                             osd_find_dnsize(obj), ZAP_FLAG_UINT64_KEY);
        if (rc)
                return ERR_PTR(rc);
        return dn;
@@ -1396,7 +1434,8 @@ static dnode_t *osd_mkdir(const struct lu_env *env, struct osd_object *obj,
        int rc;
 
        LASSERT(S_ISDIR(la->la_mode));
-       rc = __osd_zap_create(env, osd_obj2dev(obj), &dn, oh->ot_tx, la, 0);
+       rc = __osd_zap_create(env, osd_obj2dev(obj), &dn, oh->ot_tx, la,
+                             osd_find_dnsize(obj), 0);
        if (rc)
                return ERR_PTR(rc);
        return dn;
index 98032bc..cce0e99 100644 (file)
@@ -579,7 +579,7 @@ __osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
 
                la->la_valid = LA_MODE;
                la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
-               rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0);
+               rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0, 0);
                if (rc)
                        return rc;