From: Brian Behlendorf Date: Fri, 12 May 2017 15:05:13 +0000 (-0700) Subject: LU-10460 osd-zfs: Add tunables to disable sync X-Git-Tag: 2.10.58~61 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;ds=sidebyside;h=00c4bd85b372f68c3e75fdab9658c8f0074113be;p=fs%2Flustre-release.git LU-10460 osd-zfs: Add tunables to disable sync This patch allows replacing the call to txg_wait_synced(), which blocks waiting for a full pool sync, with a smaller tunable delay. This delay is intended to stand in for the time it would have taken to synchronously write the dirty data to the intent log. This allows testing ZFS behaviour as if there were a low-latency ZIL device enabled to handle sync IO operations. Setting the delay to zero disables sync operations on the server completely. However, be aware that no data is guaranteed to be written to disk if the tunables are enabled, and this patch is solely for performance analysis. By default the tunables are set to -1, which leaves the system using the normal sync behaviour. Two new tunables are introduced to control the delay, the osd_object_sync_delay_us and osd_txg_sync_delay_us module options. These values default to -1 which preserves the safe full sync pool behavior. Setting these values to zero or larger will replace the pool sync with a delay of N microseconds. The initial test results obtained by running sanityN test 16 (fsx) are encouraging. If the zil_commit() time can be kept to less than 10ms we should see a significant performance improvement. These tests were run in a pristine centos 6.4 VM and the results are averaged over four runs. osd_txg_sync_delay_us -1 -1 -1 -1 -1 osd_obj_sync_delay_us -1 0 1000 10000 100000 -------------------------------------------------------- SanityN test 16 (secs) 24.3 7.3 7.6 10.1 34.4 Change-Id: Iff9b66888edc79a5e1585fa3ce8377be068748f2 Signed-off-by: Brian Behlendorf Signed-off-by: Darby Vicker Reviewed-on: https://review.whamcloud.com/7761 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Giuseppe Di Natale --- diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 2575460..74db348 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -66,6 +66,8 @@ struct lu_context_key osd_key; +static int osd_txg_sync_delay_us = -1; + /* Slab for OSD object allocation */ struct kmem_cache *osd_object_kmem; @@ -315,8 +317,12 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, osd_unlinked_list_emptify(env, osd, &unlinked, true); - if (sync) - txg_wait_synced(dmu_objset_pool(osd->od_os), txg); + if (sync) { + if (osd_txg_sync_delay_us < 0) + txg_wait_synced(dmu_objset_pool(osd->od_os), txg); + else + udelay(osd_txg_sync_delay_us); + } RETURN(rc); } @@ -1550,6 +1556,10 @@ extern unsigned int osd_oi_count; module_param(osd_oi_count, int, 0444); MODULE_PARM_DESC(osd_oi_count, "Number of Object Index containers to be created, it's only valid for new filesystem."); +module_param(osd_txg_sync_delay_us, int, 0644); +MODULE_PARM_DESC(osd_txg_sync_delay_us, + "When zero or larger delay N usec instead of doing TXG sync"); + MODULE_AUTHOR("OpenSFS, Inc. "); MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_ZFS_NAME")"); MODULE_VERSION(LUSTRE_VERSION_STRING); diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index 508cab7..00b3f4c 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -62,6 +62,7 @@ #include char *osd_obj_tag = "osd_object"; +static int osd_object_sync_delay_us = -1; static struct dt_object_operations osd_obj_ops; static struct lu_object_operations osd_lu_obj_ops; @@ -1928,8 +1929,12 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt, * support ZIL. If the object tracked the txg that it was last * modified in, it could pass that txg here instead of "0". Maybe * the changes are already committed, so no wait is needed at all? */ - if (!osd->od_dt_dev.dd_rdonly) - txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL); + if (!osd->od_dt_dev.dd_rdonly) { + if (osd_object_sync_delay_us < 0) + txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL); + else + udelay(osd_object_sync_delay_us); + } RETURN(0); } @@ -1989,3 +1994,7 @@ static struct dt_object_operations osd_obj_otable_it_ops = { .do_attr_get = osd_otable_it_attr_get, .do_index_try = osd_index_try, }; + +module_param(osd_object_sync_delay_us, int, 0644); +MODULE_PARM_DESC(osd_object_sync_delay_us, + "If zero or larger delay N usec instead of doing object sync");