From af34a876d2ebde2b4717c920683c7fc8b5eae1cf Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 5 Jan 2018 18:39:06 -0700 Subject: [PATCH] LU-10463 osd-zfs: use 1MB RPC size by default Revert back to using 1MB RPC size for ZFS back-end storage, if it is not otherwise specified, and as long as the ZFS recordsize is 1MB or smaller. Continue to use the ZFS recordsize if it is larger. For ldiskfs, continue to use 4MB RPC size, unless the bigalloc feature is enabled and has a larger chunksize. Testing has shown that while 4MB RPC size is good for ldiskfs, it does not improve ZFS performance, and increases IO variability in some cases. Signed-off-by: Andreas Dilger Change-Id: I4b306843667bfd960ad07ecc3886a696fd3ebbe5 Reviewed-on: https://review.whamcloud.com/30757 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- lustre/include/dt_object.h | 3 ++- lustre/ofd/ofd_dev.c | 6 +----- lustre/osd-ldiskfs/osd_handler.c | 10 ++++++++++ lustre/osd-zfs/osd_handler.c | 8 ++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index a10f4a9..2bf129e 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -84,7 +84,8 @@ struct dt_device_param { unsigned ddp_max_extent_blks; /* per-extent insertion overhead to be used by client for grant * calculation */ - unsigned ddp_extent_tax; + unsigned int ddp_extent_tax; + unsigned int ddp_brw_size; /* optimal RPC size */ }; /** diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 73df3d4..f7eb89c 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -2952,11 +2952,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, tgd->tgd_reserved_pcnt = 0; - if (DT_DEF_BRW_SIZE < (1U << tgd->tgd_blockbits)) - m->ofd_brw_size = 1U << tgd->tgd_blockbits; - else - m->ofd_brw_size = DT_DEF_BRW_SIZE; - + m->ofd_brw_size = m->ofd_lut.lut_dt_conf.ddp_brw_size; m->ofd_cksum_types_supported = cksum_types_supported_server(); m->ofd_precreate_batch = OFD_PRECREATE_BATCH_DEFAULT; if (tgd->tgd_osfs.os_bsize * tgd->tgd_osfs.os_blocks < diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 8e0ef0c..14a48f9 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -2160,6 +2160,16 @@ static void osd_conf_get(const struct lu_env *env, else #endif param->ddp_max_ea_size = sb->s_blocksize - ea_overhead; + + /* Preferred RPC size for efficient disk IO. 4MB shows good + * all-around performance for ldiskfs, but use bigalloc chunk size + * by default if larger. */ +#if defined(LDISKFS_CLUSTER_SIZE) + if (LDISKFS_CLUSTER_SIZE(sb) > DT_DEF_BRW_SIZE) + param->ddp_brw_size = LDISKFS_CLUSTER_SIZE(sb); + else +#endif + param->ddp_brw_size = DT_DEF_BRW_SIZE; } /* diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 724c3e9..2575460 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -608,6 +608,14 @@ static void osd_conf_get(const struct lu_env *env, param->ddp_max_extent_blks = (1 << (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)); param->ddp_extent_tax = osd_blk_insert_cost(osd); + + /* Preferred RPC size for efficient disk IO. 1MB shows good + * all-around performance for ZFS, but use blocksize (recordsize) + * by default if larger to avoid read-modify-write. */ + if (osd->od_max_blksz > ONE_MB_BRW_SIZE) + param->ddp_brw_size = osd->od_max_blksz; + else + param->ddp_brw_size = ONE_MB_BRW_SIZE; } /* -- 1.8.3.1