From 21364e040ca2c4d9ad3025a51ff2cc5afca2132f Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Wed, 30 Nov 2022 09:29:47 -0500 Subject: [PATCH] EX-6468 pcc: add threshold to determine direct I/O during attach This patch adds the threshold tunable parameter to determine doing direct I/O or buffered I/O for data copying during attach: llite.*.pcc_dio_attach_threshold The default value is same as direct I/O size: 32MiB. And the usage of the parameter "pcc_dio_attach_size_mb" is deprecated, and use "pcc_dio_attach_iosize_mb" instead. Change-Id: I393d6a06523303e749192ba9978449c3d75886ae Signed-off-by: Qian Yingjin Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49286 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Feng Lei Reviewed-by: Andreas Dilger --- lustre/llite/lproc_llite.c | 61 ++++++++++++++++++++++++++++++++++++++----- lustre/llite/pcc.c | 34 ++++++++++++------------ lustre/llite/pcc.h | 8 +++++- lustre/tests/sanity-pcc.sh | 64 +++++++++++++++++++++++++++++----------------- 4 files changed, 121 insertions(+), 46 deletions(-) diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 82421dd..0c4e0a6 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -619,19 +619,19 @@ static ssize_t pcc_async_threshold_store(struct kobject *kobj, } LUSTRE_RW_ATTR(pcc_async_threshold); -static ssize_t pcc_dio_attach_size_mb_show(struct kobject *kobj, - struct attribute *attr, - char *buffer) +static ssize_t pcc_dio_attach_iosize_mb_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) { struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, ll_kset.kobj); struct pcc_super *super = &sbi->ll_pcc_super; return sprintf(buffer, "%u\n", - super->pccs_dio_attach_size_bytes / (1024 * 1024)); + super->pccs_dio_attach_iosize / (1024 * 1024)); } -static ssize_t pcc_dio_attach_size_mb_store(struct kobject *kobj, +static ssize_t pcc_dio_attach_iosize_mb_store(struct kobject *kobj, struct attribute *attr, const char *buffer, size_t count) { @@ -656,12 +656,59 @@ static ssize_t pcc_dio_attach_size_mb_store(struct kobject *kobj, bytes = PCC_DIO_ATTACH_MAXIOSIZE; } - super->pccs_dio_attach_size_bytes = bytes; + super->pccs_dio_attach_iosize = bytes; return count; } +LUSTRE_RW_ATTR(pcc_dio_attach_iosize_mb); + +static ssize_t pcc_dio_attach_size_mb_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) +{ + CWARN("llite.*.pcc_dio_attach_size_mb is deprecated, use pcc_dio_attach_iosize_mb\n"); + return pcc_dio_attach_iosize_mb_show(kobj, attr, buffer); +} + +static ssize_t pcc_dio_attach_size_mb_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + CWARN("llite.*.pcc_dio_attach_size_mb is deprecated, use pcc_dio_attach_iosize_mb\n"); + return pcc_dio_attach_iosize_mb_store(kobj, attr, buffer, count); +} LUSTRE_RW_ATTR(pcc_dio_attach_size_mb); +static ssize_t pcc_dio_attach_threshold_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + struct pcc_super *super = &sbi->ll_pcc_super; + + return sprintf(buffer, "%llu\n", super->pccs_dio_attach_threshold); +} + +static ssize_t pcc_dio_attach_threshold_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + struct pcc_super *super = &sbi->ll_pcc_super; + u64 size; + int rc; + + rc = sysfs_memparse(buffer, count, &size, "B"); + if (rc) + return rc; + + super->pccs_dio_attach_threshold = size; + return count; +} +LUSTRE_RW_ATTR(pcc_dio_attach_threshold); + static ssize_t pcc_max_attach_thread_num_show(struct kobject *kobj, struct attribute *attr, char *buffer) @@ -1982,6 +2029,8 @@ static struct attribute *llite_attrs[] = { &lustre_attr_inode_cache.attr, &lustre_attr_pcc_async_threshold.attr, &lustre_attr_pcc_dio_attach_size_mb.attr, + &lustre_attr_pcc_dio_attach_iosize_mb.attr, + &lustre_attr_pcc_dio_attach_threshold.attr, &lustre_attr_pcc_max_attach_thread_num.attr, &lustre_attr_pcc_mode.attr, &lustre_attr_pcc_async_affinity.attr, diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index a9fa394..33f3117 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -130,7 +130,8 @@ int pcc_super_init(struct pcc_super *super) INIT_LIST_HEAD(&super->pccs_datasets); super->pccs_generation = 1; super->pccs_async_threshold = PCC_DEFAULT_ASYNC_THRESHOLD; - super->pccs_dio_attach_size_bytes = PCC_DEFAULT_DIO_ATTACH_IOSIZE; + super->pccs_dio_attach_iosize = PCC_DEFAULT_DIO_ATTACH_IOSIZE; + super->pccs_dio_attach_threshold = PCC_DEFAULT_DIO_ATTACH_THRESHOLD; super->pccs_mode = S_IRUSR; atomic_set(&super->pccs_attaches_queued, 0); super->pccs_maximum_queued_attaches = PCCS_DEFAULT_ATTACH_QUEUE_DEPTH; @@ -3814,8 +3815,8 @@ out_free: RETURN(rc); } -static ssize_t pcc_copy_data_dio(struct pcc_super *super, - struct file *lu_file, char *pcc_filepath) +static ssize_t pcc_copy_data_dio(struct pcc_super *super, struct file *lu_file, + char *pcc_filepath, __u64 filesize) { char *envp[] = { [0] = "HOME=/", @@ -3835,7 +3836,7 @@ static ssize_t pcc_copy_data_dio(struct pcc_super *super, argv[9] = "-d", /* use DIO */ argv[10] = NULL, }; - __u32 iosize = super->pccs_dio_attach_size_bytes; + __u32 iosize = super->pccs_dio_attach_iosize; /* iosize is __u32, 2^32 is 4 billion, which is 10 digits, so + 1 for nul */ #define IOSIZE_MAXLEN 11 char iosize_str[IOSIZE_MAXLEN]; @@ -3854,6 +3855,8 @@ static ssize_t pcc_copy_data_dio(struct pcc_super *super, } argv[2] = fidstring; + if (iosize > filesize) + iosize = roundup(filesize, 1024 * 1024); /* this should be impossible, but let's check for it anyway */ if (snprintf(iosize_str, IOSIZE_MAXLEN, "%u", iosize) >= IOSIZE_MAXLEN) { @@ -3873,7 +3876,7 @@ static ssize_t pcc_copy_data_dio(struct pcc_super *super, static ssize_t pcc_copy_data(struct pcc_super *super, struct file *lu_file, struct file *pcc_file, char *pcc_pathname, - bool atomic_open_locked) + __u64 size, bool atomic_open_locked) { if (pcc_pathname) { #ifndef HAVE_INODE_RWSEM @@ -3885,13 +3888,13 @@ static ssize_t pcc_copy_data(struct pcc_super *super, struct file *lu_file, LASSERT(inode_is_locked(dir)); inode_unlock(dir); } - rc = pcc_copy_data_dio(super, lu_file, pcc_pathname); + rc = pcc_copy_data_dio(super, lu_file, pcc_pathname, size); if (atomic_open_locked) inode_lock(dir); return rc; #else - return pcc_copy_data_dio(super, lu_file, pcc_pathname); + return pcc_copy_data_dio(super, lu_file, pcc_pathname, size); #endif } @@ -3912,22 +3915,21 @@ static int pcc_attach_data_archive(struct file *lu_file, struct path pcc_path; ktime_t kstart = ktime_get(); ssize_t ret; - bool use_dio = false; int flags = O_WRONLY | O_LARGEFILE; int rc; - __u64 filesize = max_t(__u64, ll_i2info(lu_inode)->lli_lazysize, i_size_read(lu_inode)); - __u64 iosize_bytes = super->pccs_dio_attach_size_bytes; + __u64 filesize = max_t(__u64, ll_i2info(lu_inode)->lli_lazysize, + i_size_read(lu_inode)); ENTRY; - /* use DIO for files which are at least iosize bytes in size, otherwise - * use buffered I/O - this is a good default and avoids having a - * separate threshold tunable for DIO attach size + /* + * Use DIO for files which are at least @pccs_dio_attach_threshold + * bytes in size, otherwise use buffered I/O. */ - if (filesize >= iosize_bytes && iosize_bytes !=0) { + if (filesize >= super->pccs_dio_attach_threshold && + super->pccs_dio_attach_iosize != 0) { int pathlen; - use_dio = true; OBD_ALLOC(pcc_pathname, PATH_MAX); if (!pcc_pathname) GOTO(out, rc = -ENOMEM); @@ -3995,7 +3997,7 @@ static int pcc_attach_data_archive(struct file *lu_file, direct = true; } - ret = pcc_copy_data(super, lu_file, pcc_file, pcc_pathname, + ret = pcc_copy_data(super, lu_file, pcc_file, pcc_pathname, filesize, atomic_open_locked); if (direct) lu_file->f_flags |= O_DIRECT; diff --git a/lustre/llite/pcc.h b/lustre/llite/pcc.h index 9d9d462..d23c73f 100644 --- a/lustre/llite/pcc.h +++ b/lustre/llite/pcc.h @@ -169,6 +169,11 @@ struct pcc_dataset { #define PCC_DIO_ATTACH_MAXIOSIZE (1 << 28) /* 256 MiB */ /* 32 MiB gives good performance without using too much memory */ #define PCC_DEFAULT_DIO_ATTACH_IOSIZE (32 * (1 << 20)) /* 32 MiB */ +/* + * The threshold to determine doing direct I/O or buffered I/O for data + * copying during attach. + */ +#define PCC_DEFAULT_DIO_ATTACH_THRESHOLD (32 * (1 << 20)) /* 32 MiB */ /* after this many attaches are queued up, fall back to sync attach. each * attach creates a kthread, so we don't allow too many at once, but sync * attach is very bad for applications, so we try to be generous. @@ -189,7 +194,8 @@ struct pcc_super { __u64 pccs_generation; /* Size threshold for asynchrous PCC-RO attach in background. */ __u64 pccs_async_threshold; - __u32 pccs_dio_attach_size_bytes; + __u32 pccs_dio_attach_iosize; + __u64 pccs_dio_attach_threshold; bool pccs_async_affinity; umode_t pccs_mode; atomic_t pccs_attaches_queued; diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index 3a53c8a..2eb48a2 100644 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -3860,18 +3860,21 @@ test_49b() { $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro || skip "Server does not support PCC-RO" - local dio_attach_size=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_size_mb | head -n 1) - stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=$dio_attach_size" EXIT + local dio_attach_iosize=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1) + stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize" EXIT + + local dio_threshold=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_threshold | head -n 1) + stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=$dio_threshold" EXIT echo "Testing that invalid inputs should be clamped to [min, max] bound" - do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=300 || + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=300 || error "setting iosize to > 256 MiB should be clamped" - dio_attach_size=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_size_mb | head -n 1) + dio_attach_iosize=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1) # allow future limit increase, but at least check limit was changed - (( $dio_attach_size >= 256 )) || + (( $dio_attach_iosize >= 256 )) || error "set iosize > 256 MiB should not be clamped < 256 MiB" - do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=0 || + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=0 || error "should be able to set attach size to 0" echo "Normal testing from here on - no errors expected." @@ -3880,8 +3883,11 @@ test_49b() { setup_pcc_mapping $SINGLEAGT \ "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0\ pccrw=1\ pccro=1" - do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=1 || - error "failed to set dio_attach_size_mb" + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=1 || + error "failed to set dio_attach_iosize_mb" + + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=1048576 || + error "failed to set dio_attach_threshold" # DIO attach size is 1 MiB, so this will DIO attach mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed" @@ -3972,8 +3978,8 @@ test_49b() { check_lpcc_state $file "none" # Test attach with non-default size - do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=16 || - error "failed to set dio_attach_size" + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=16 || + error "failed to set dio_attach_iosize" rm -f $file || error "failed to remove $file" @@ -4012,11 +4018,17 @@ test_49c() { setup_pcc_mapping $SINGLEAGT \ "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0\ pccrw=1\ pccro=1" - local dio_attach_size=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_size_mb | head -n 1) - stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=$dio_attach_size" EXIT + local dio_attach_iosize=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1) + stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize" EXIT + + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=1 || + error "failed to set dio_attach_iosize_mb" - do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=1 || - error "failed to set dio_attach_size_mb" + local dio_threshold=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_threshold | head -n 1) + stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=$dio_threshold" EXIT + + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=1048576 || + error "failed to set dio_attach_threshold" # DIO attach size is 1 MiB, so this will DIO attach mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed" @@ -4117,21 +4129,27 @@ run_test 49c "Test multiple attach in parallel" test_49d() { local file=$DIR/$tdir/$tfile - local io_size_mb=16 - local io_size=$((1024 * 1024 * io_size_mb)) + local iosize_mb=16 + local iosize=$((1024 * 1024 * iosize_mb)) $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro || skip "Server does not support PCC-RO" mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed" - local dio_attach_size=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_size_mb | head -n 1) - stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=$dio_attach_size" EXIT + local dio_attach_iosize=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1) + stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize" EXIT + + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$iosize_mb || + error "failed to set dio_attach_iosize_mb" + + local dio_threshold=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_threshold | head -n 1) + stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=$dio_threshold" EXIT - do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=$io_size_mb || - error "failed to set dio_attach_size_mb" + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=$iosize || + error "failed to set dio_attach_threshold" - # attach size is 16M and we stop testing at just over 2*io_size + # attach size is 16M and we stop testing at just over 2*iosize # This is necessary because reading from urandom is extremely slow dd if=/dev/urandom bs=1M of=$file.src count=48 || error "dd to create source file failed" @@ -4140,8 +4158,8 @@ test_49d() { setup_pcc_mapping $SINGLEAGT \ "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0\ pccrw=1\ pccro=1" - for size in $((io_size * 2 + 2)) $((io_size * 2)) $((io_size + 2))\ - $((io_size)) $((1024 * 1024 + 1)) $((1024 * 1024)) 5000 \ + for size in $((iosize * 2 + 2)) $((iosize * 2)) $((iosize + 2))\ + $((iosize)) $((1024 * 1024 + 1)) $((1024 * 1024)) 5000 \ 4096 2048 1; do # Shrink the source to the correct size so we can just read it # at large block sizes with dd to create our desired file size -- 1.8.3.1