Whamcloud - gitweb
LU-10463 osd-zfs: use 1MB RPC size by default 69/30969/2
authorAndreas Dilger <andreas.dilger@intel.com>
Sat, 6 Jan 2018 01:39:06 +0000 (18:39 -0700)
committerJohn L. Hammond <john.hammond@intel.com>
Fri, 9 Feb 2018 18:12:57 +0000 (18:12 +0000)
Revert back to using 1MB RPC size for ZFS back-end storage, if it
is not otherwise specified, and as long as the ZFS recordsize is
1MB or smaller.  Continue to use the ZFS recordsize if it is larger.

For ldiskfs, continue to use 4MB RPC size, unless the bigalloc
feature is enabled and has a larger chunksize.

Testing has shown that while 4MB RPC size is good for ldiskfs, it
does not improve ZFS performance, and increases IO variability in
some cases.

Lustre-change: https://review.whamcloud.com/30757
Lustre-commit: af34a876d2ebde2b4717c920683c7fc8b5eae1cf

Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Change-Id: I4b306843667bfd960ad07ecc3886a696fd3ebbe5
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Signed-off-by: Minh Diep <minh.diep@intel.com>
Reviewed-on: https://review.whamcloud.com/30969
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
lustre/include/dt_object.h
lustre/ofd/ofd_dev.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-zfs/osd_handler.c

index 436139f..4f8a0ce 100644 (file)
@@ -84,7 +84,8 @@ struct dt_device_param {
        unsigned           ddp_max_extent_blks;
        /* per-extent insertion overhead to be used by client for grant
         * calculation */
-       unsigned           ddp_extent_tax;
+       unsigned int       ddp_extent_tax;
+       unsigned int       ddp_brw_size;        /* optimal RPC size */
 };
 
 /**
index 7c7d234..fde41b7 100644 (file)
@@ -3031,11 +3031,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
        }
        tgd->tgd_blockbits = fls(osfs->os_bsize) - 1;
 
-       if (DT_DEF_BRW_SIZE < (1U << tgd->tgd_blockbits))
-               m->ofd_brw_size = 1U << tgd->tgd_blockbits;
-       else
-               m->ofd_brw_size = DT_DEF_BRW_SIZE;
-
+       m->ofd_brw_size = m->ofd_lut.lut_dt_conf.ddp_brw_size;
        m->ofd_cksum_types_supported = cksum_types_supported_server();
        m->ofd_precreate_batch = OFD_PRECREATE_BATCH_DEFAULT;
        if (osfs->os_bsize * osfs->os_blocks < OFD_PRECREATE_SMALL_FS)
index 6e310dc..cf8e476 100644 (file)
@@ -2120,6 +2120,16 @@ static void osd_conf_get(const struct lu_env *env,
        else
 #endif
                param->ddp_max_ea_size = sb->s_blocksize - ea_overhead;
+
+       /* Preferred RPC size for efficient disk IO.  4MB shows good
+        * all-around performance for ldiskfs, but use bigalloc chunk size
+        * by default if larger. */
+#if defined(LDISKFS_CLUSTER_SIZE)
+       if (LDISKFS_CLUSTER_SIZE(sb) > DT_DEF_BRW_SIZE)
+               param->ddp_brw_size = LDISKFS_CLUSTER_SIZE(sb);
+       else
+#endif
+               param->ddp_brw_size = DT_DEF_BRW_SIZE;
 }
 
 /*
index 67da840..44b540d 100644 (file)
@@ -612,6 +612,14 @@ static void osd_conf_get(const struct lu_env *env,
        param->ddp_max_extent_blks =
                (1 << (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT));
        param->ddp_extent_tax = osd_blk_insert_cost(osd);
+
+       /* Preferred RPC size for efficient disk IO.  1MB shows good
+        * all-around performance for ZFS, but use blocksize (recordsize)
+        * by default if larger to avoid read-modify-write. */
+       if (osd->od_max_blksz > ONE_MB_BRW_SIZE)
+               param->ddp_brw_size = osd->od_max_blksz;
+       else
+               param->ddp_brw_size = ONE_MB_BRW_SIZE;
 }
 
 /*