From f119ec3196eb3e7773eeb4dcb3d825d7f8725a9c Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 5 Jan 2018 18:39:06 -0700 Subject: [PATCH] LU-10463 osd-zfs: use 1MB RPC size by default Revert back to using 1MB RPC size for ZFS back-end storage, if it is not otherwise specified, and as long as the ZFS recordsize is 1MB or smaller. Continue to use the ZFS recordsize if it is larger. For ldiskfs, continue to use 4MB RPC size, unless the bigalloc feature is enabled and has a larger chunksize. Testing has shown that while 4MB RPC size is good for ldiskfs, it does not improve ZFS performance, and increases IO variability in some cases. Lustre-change: https://review.whamcloud.com/30757 Lustre-commit: af34a876d2ebde2b4717c920683c7fc8b5eae1cf Signed-off-by: Andreas Dilger Change-Id: I4b306843667bfd960ad07ecc3886a696fd3ebbe5 Reviewed-by: Alex Zhuravlev Reviewed-by: Jian Yu Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/30969 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: John L. Hammond --- lustre/include/dt_object.h | 3 ++- lustre/ofd/ofd_dev.c | 6 +----- lustre/osd-ldiskfs/osd_handler.c | 10 ++++++++++ lustre/osd-zfs/osd_handler.c | 8 ++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 436139f..4f8a0ce 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -84,7 +84,8 @@ struct dt_device_param { unsigned ddp_max_extent_blks; /* per-extent insertion overhead to be used by client for grant * calculation */ - unsigned ddp_extent_tax; + unsigned int ddp_extent_tax; + unsigned int ddp_brw_size; /* optimal RPC size */ }; /** diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 7c7d234..fde41b7 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -3031,11 +3031,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, } tgd->tgd_blockbits = fls(osfs->os_bsize) - 1; - if (DT_DEF_BRW_SIZE < (1U << tgd->tgd_blockbits)) - m->ofd_brw_size = 1U << tgd->tgd_blockbits; - else - m->ofd_brw_size = DT_DEF_BRW_SIZE; - + m->ofd_brw_size = m->ofd_lut.lut_dt_conf.ddp_brw_size; m->ofd_cksum_types_supported = cksum_types_supported_server(); m->ofd_precreate_batch = OFD_PRECREATE_BATCH_DEFAULT; if (osfs->os_bsize * osfs->os_blocks < OFD_PRECREATE_SMALL_FS) diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 6e310dc..cf8e476 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -2120,6 +2120,16 @@ static void osd_conf_get(const struct lu_env *env, else #endif param->ddp_max_ea_size = sb->s_blocksize - ea_overhead; + + /* Preferred RPC size for efficient disk IO. 4MB shows good + * all-around performance for ldiskfs, but use bigalloc chunk size + * by default if larger. */ +#if defined(LDISKFS_CLUSTER_SIZE) + if (LDISKFS_CLUSTER_SIZE(sb) > DT_DEF_BRW_SIZE) + param->ddp_brw_size = LDISKFS_CLUSTER_SIZE(sb); + else +#endif + param->ddp_brw_size = DT_DEF_BRW_SIZE; } /* diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 67da840..44b540d 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -612,6 +612,14 @@ static void osd_conf_get(const struct lu_env *env, param->ddp_max_extent_blks = (1 << (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)); param->ddp_extent_tax = osd_blk_insert_cost(osd); + + /* Preferred RPC size for efficient disk IO. 1MB shows good + * all-around performance for ZFS, but use blocksize (recordsize) + * by default if larger to avoid read-modify-write. */ + if (osd->od_max_blksz > ONE_MB_BRW_SIZE) + param->ddp_brw_size = osd->od_max_blksz; + else + param->ddp_brw_size = ONE_MB_BRW_SIZE; } /* -- 1.8.3.1