Whamcloud - gitweb
LU-10463 osd-zfs: use 1MB RPC size by default 57/30757/3
authorAndreas Dilger <andreas.dilger@intel.com>
Sat, 6 Jan 2018 01:39:06 +0000 (18:39 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 20 Jan 2018 06:19:14 +0000 (06:19 +0000)
Revert back to using 1MB RPC size for ZFS back-end storage, if it
is not otherwise specified, and as long as the ZFS recordsize is
1MB or smaller.  Continue to use the ZFS recordsize if it is larger.

For ldiskfs, continue to use 4MB RPC size, unless the bigalloc
feature is enabled and has a larger chunksize.

Testing has shown that while 4MB RPC size is good for ldiskfs, it
does not improve ZFS performance, and increases IO variability in
some cases.

Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Change-Id: I4b306843667bfd960ad07ecc3886a696fd3ebbe5
Reviewed-on: https://review.whamcloud.com/30757
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/dt_object.h
lustre/ofd/ofd_dev.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-zfs/osd_handler.c

index a10f4a9..2bf129e 100644 (file)
@@ -84,7 +84,8 @@ struct dt_device_param {
        unsigned           ddp_max_extent_blks;
        /* per-extent insertion overhead to be used by client for grant
         * calculation */
-       unsigned           ddp_extent_tax;
+       unsigned int       ddp_extent_tax;
+       unsigned int       ddp_brw_size;        /* optimal RPC size */
 };
 
 /**
index 73df3d4..f7eb89c 100644 (file)
@@ -2952,11 +2952,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
 
        tgd->tgd_reserved_pcnt = 0;
 
-       if (DT_DEF_BRW_SIZE < (1U << tgd->tgd_blockbits))
-               m->ofd_brw_size = 1U << tgd->tgd_blockbits;
-       else
-               m->ofd_brw_size = DT_DEF_BRW_SIZE;
-
+       m->ofd_brw_size = m->ofd_lut.lut_dt_conf.ddp_brw_size;
        m->ofd_cksum_types_supported = cksum_types_supported_server();
        m->ofd_precreate_batch = OFD_PRECREATE_BATCH_DEFAULT;
        if (tgd->tgd_osfs.os_bsize * tgd->tgd_osfs.os_blocks <
index 8e0ef0c..14a48f9 100644 (file)
@@ -2160,6 +2160,16 @@ static void osd_conf_get(const struct lu_env *env,
        else
 #endif
                param->ddp_max_ea_size = sb->s_blocksize - ea_overhead;
+
+       /* Preferred RPC size for efficient disk IO.  4MB shows good
+        * all-around performance for ldiskfs, but use bigalloc chunk size
+        * by default if larger. */
+#if defined(LDISKFS_CLUSTER_SIZE)
+       if (LDISKFS_CLUSTER_SIZE(sb) > DT_DEF_BRW_SIZE)
+               param->ddp_brw_size = LDISKFS_CLUSTER_SIZE(sb);
+       else
+#endif
+               param->ddp_brw_size = DT_DEF_BRW_SIZE;
 }
 
 /*
index 724c3e9..2575460 100644 (file)
@@ -608,6 +608,14 @@ static void osd_conf_get(const struct lu_env *env,
        param->ddp_max_extent_blks =
                (1 << (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT));
        param->ddp_extent_tax = osd_blk_insert_cost(osd);
+
+       /* Preferred RPC size for efficient disk IO.  1MB shows good
+        * all-around performance for ZFS, but use blocksize (recordsize)
+        * by default if larger to avoid read-modify-write. */
+       if (osd->od_max_blksz > ONE_MB_BRW_SIZE)
+               param->ddp_brw_size = osd->od_max_blksz;
+       else
+               param->ddp_brw_size = ONE_MB_BRW_SIZE;
 }
 
 /*