Whamcloud - gitweb
EX-6468 pcc: add threshold to determine direct I/O during attach
authorQian Yingjin <qian@ddn.com>
Wed, 30 Nov 2022 14:29:47 +0000 (09:29 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 13 Dec 2022 18:51:07 +0000 (18:51 +0000)
This patch adds the threshold tunable parameter to determine doing
direct I/O or buffered I/O for data copying during attach:
llite.*.pcc_dio_attach_threshold
The default value is same as direct I/O size: 32MiB.

And the usage of the parameter "pcc_dio_attach_size_mb" is
deprecated, and use "pcc_dio_attach_iosize_mb" instead.

Change-Id: I393d6a06523303e749192ba9978449c3d75886ae
Signed-off-by: Qian Yingjin <qian@ddn.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49286
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Feng Lei <flei@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/llite/lproc_llite.c
lustre/llite/pcc.c
lustre/llite/pcc.h
lustre/tests/sanity-pcc.sh

index 82421dd..0c4e0a6 100644 (file)
@@ -619,19 +619,19 @@ static ssize_t pcc_async_threshold_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(pcc_async_threshold);
 
-static ssize_t pcc_dio_attach_size_mb_show(struct kobject *kobj,
-                                          struct attribute *attr,
-                                          char *buffer)
+static ssize_t pcc_dio_attach_iosize_mb_show(struct kobject *kobj,
+                                            struct attribute *attr,
+                                            char *buffer)
 {
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
        struct pcc_super *super = &sbi->ll_pcc_super;
 
        return sprintf(buffer, "%u\n",
-                      super->pccs_dio_attach_size_bytes / (1024 * 1024));
+                      super->pccs_dio_attach_iosize / (1024 * 1024));
 }
 
-static ssize_t pcc_dio_attach_size_mb_store(struct kobject *kobj,
+static ssize_t pcc_dio_attach_iosize_mb_store(struct kobject *kobj,
                                              struct attribute *attr,
                                              const char *buffer, size_t count)
 {
@@ -656,12 +656,59 @@ static ssize_t pcc_dio_attach_size_mb_store(struct kobject *kobj,
                bytes = PCC_DIO_ATTACH_MAXIOSIZE;
        }
 
-       super->pccs_dio_attach_size_bytes = bytes;
+       super->pccs_dio_attach_iosize = bytes;
 
        return count;
 }
+LUSTRE_RW_ATTR(pcc_dio_attach_iosize_mb);
+
+static ssize_t pcc_dio_attach_size_mb_show(struct kobject *kobj,
+                                          struct attribute *attr,
+                                          char *buffer)
+{
+       CWARN("llite.*.pcc_dio_attach_size_mb is deprecated, use pcc_dio_attach_iosize_mb\n");
+       return pcc_dio_attach_iosize_mb_show(kobj, attr, buffer);
+}
+
+static ssize_t pcc_dio_attach_size_mb_store(struct kobject *kobj,
+                                           struct attribute *attr,
+                                           const char *buffer, size_t count)
+{
+       CWARN("llite.*.pcc_dio_attach_size_mb is deprecated, use pcc_dio_attach_iosize_mb\n");
+       return pcc_dio_attach_iosize_mb_store(kobj, attr, buffer, count);
+}
 LUSTRE_RW_ATTR(pcc_dio_attach_size_mb);
 
+static ssize_t pcc_dio_attach_threshold_show(struct kobject *kobj,
+                                            struct attribute *attr,
+                                            char *buffer)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       struct pcc_super *super = &sbi->ll_pcc_super;
+
+       return sprintf(buffer, "%llu\n", super->pccs_dio_attach_threshold);
+}
+
+static ssize_t pcc_dio_attach_threshold_store(struct kobject *kobj,
+                                             struct attribute *attr,
+                                             const char *buffer, size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       struct pcc_super *super = &sbi->ll_pcc_super;
+       u64 size;
+       int rc;
+
+       rc = sysfs_memparse(buffer, count, &size, "B");
+       if (rc)
+               return rc;
+
+       super->pccs_dio_attach_threshold = size;
+       return count;
+}
+LUSTRE_RW_ATTR(pcc_dio_attach_threshold);
+
 static ssize_t
 pcc_max_attach_thread_num_show(struct kobject *kobj, struct attribute *attr,
                               char *buffer)
@@ -1982,6 +2029,8 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_inode_cache.attr,
        &lustre_attr_pcc_async_threshold.attr,
        &lustre_attr_pcc_dio_attach_size_mb.attr,
+       &lustre_attr_pcc_dio_attach_iosize_mb.attr,
+       &lustre_attr_pcc_dio_attach_threshold.attr,
        &lustre_attr_pcc_max_attach_thread_num.attr,
        &lustre_attr_pcc_mode.attr,
        &lustre_attr_pcc_async_affinity.attr,
index a9fa394..33f3117 100644 (file)
@@ -130,7 +130,8 @@ int pcc_super_init(struct pcc_super *super)
        INIT_LIST_HEAD(&super->pccs_datasets);
        super->pccs_generation = 1;
        super->pccs_async_threshold = PCC_DEFAULT_ASYNC_THRESHOLD;
-       super->pccs_dio_attach_size_bytes = PCC_DEFAULT_DIO_ATTACH_IOSIZE;
+       super->pccs_dio_attach_iosize = PCC_DEFAULT_DIO_ATTACH_IOSIZE;
+       super->pccs_dio_attach_threshold = PCC_DEFAULT_DIO_ATTACH_THRESHOLD;
        super->pccs_mode = S_IRUSR;
        atomic_set(&super->pccs_attaches_queued, 0);
        super->pccs_maximum_queued_attaches = PCCS_DEFAULT_ATTACH_QUEUE_DEPTH;
@@ -3814,8 +3815,8 @@ out_free:
        RETURN(rc);
 }
 
-static ssize_t pcc_copy_data_dio(struct pcc_super *super,
-                                struct file *lu_file, char *pcc_filepath)
+static ssize_t pcc_copy_data_dio(struct pcc_super *super, struct file *lu_file,
+                                char *pcc_filepath, __u64 filesize)
 {
        char *envp[] = {
                [0] = "HOME=/",
@@ -3835,7 +3836,7 @@ static ssize_t pcc_copy_data_dio(struct pcc_super *super,
                argv[9] = "-d", /* use DIO */
                argv[10] = NULL,
        };
-       __u32 iosize = super->pccs_dio_attach_size_bytes;
+       __u32 iosize = super->pccs_dio_attach_iosize;
 /* iosize is __u32, 2^32 is 4 billion, which is 10 digits, so + 1 for nul */
 #define IOSIZE_MAXLEN 11
        char iosize_str[IOSIZE_MAXLEN];
@@ -3854,6 +3855,8 @@ static ssize_t pcc_copy_data_dio(struct pcc_super *super,
        }
        argv[2] = fidstring;
 
+       if (iosize > filesize)
+               iosize = roundup(filesize, 1024 * 1024);
        /* this should be impossible, but let's check for it anyway */
        if (snprintf(iosize_str, IOSIZE_MAXLEN, "%u", iosize) >=
            IOSIZE_MAXLEN) {
@@ -3873,7 +3876,7 @@ static ssize_t pcc_copy_data_dio(struct pcc_super *super,
 
 static ssize_t pcc_copy_data(struct pcc_super *super, struct file *lu_file,
                             struct file *pcc_file, char *pcc_pathname,
-                            bool atomic_open_locked)
+                            __u64 size, bool atomic_open_locked)
 {
        if (pcc_pathname) {
 #ifndef HAVE_INODE_RWSEM
@@ -3885,13 +3888,13 @@ static ssize_t pcc_copy_data(struct pcc_super *super, struct file *lu_file,
                        LASSERT(inode_is_locked(dir));
                        inode_unlock(dir);
                }
-               rc = pcc_copy_data_dio(super, lu_file, pcc_pathname);
+               rc = pcc_copy_data_dio(super, lu_file, pcc_pathname, size);
                if (atomic_open_locked)
                        inode_lock(dir);
 
                return rc;
 #else
-               return pcc_copy_data_dio(super, lu_file, pcc_pathname);
+               return pcc_copy_data_dio(super, lu_file, pcc_pathname, size);
 #endif
        }
 
@@ -3912,22 +3915,21 @@ static int pcc_attach_data_archive(struct file *lu_file,
        struct path pcc_path;
        ktime_t kstart = ktime_get();
        ssize_t ret;
-       bool use_dio = false;
        int flags = O_WRONLY | O_LARGEFILE;
        int rc;
-       __u64 filesize = max_t(__u64, ll_i2info(lu_inode)->lli_lazysize, i_size_read(lu_inode));
-       __u64 iosize_bytes = super->pccs_dio_attach_size_bytes;
+       __u64 filesize = max_t(__u64, ll_i2info(lu_inode)->lli_lazysize,
+                              i_size_read(lu_inode));
 
        ENTRY;
 
-       /* use DIO for files which are at least iosize bytes in size, otherwise
-        * use buffered I/O - this is a good default and avoids having a
-        * separate threshold tunable for DIO attach size
+       /*
+        * Use DIO for files which are at least @pccs_dio_attach_threshold
+        * bytes in size, otherwise use buffered I/O.
         */
-       if (filesize >= iosize_bytes && iosize_bytes !=0) {
+       if (filesize >= super->pccs_dio_attach_threshold &&
+           super->pccs_dio_attach_iosize != 0) {
                int pathlen;
 
-               use_dio = true;
                OBD_ALLOC(pcc_pathname, PATH_MAX);
                if (!pcc_pathname)
                        GOTO(out, rc = -ENOMEM);
@@ -3995,7 +3997,7 @@ static int pcc_attach_data_archive(struct file *lu_file,
                direct = true;
        }
 
-       ret = pcc_copy_data(super, lu_file, pcc_file, pcc_pathname,
+       ret = pcc_copy_data(super, lu_file, pcc_file, pcc_pathname, filesize,
                            atomic_open_locked);
        if (direct)
                lu_file->f_flags |= O_DIRECT;
index 9d9d462..d23c73f 100644 (file)
@@ -169,6 +169,11 @@ struct pcc_dataset {
 #define PCC_DIO_ATTACH_MAXIOSIZE (1 << 28) /* 256 MiB */
 /* 32 MiB gives good performance without using too much memory */
 #define PCC_DEFAULT_DIO_ATTACH_IOSIZE (32 * (1 << 20)) /* 32 MiB */
+/*
+ * The threshold to determine doing direct I/O or buffered I/O for data
+ * copying during attach.
+ */
+#define PCC_DEFAULT_DIO_ATTACH_THRESHOLD (32 * (1 << 20)) /* 32 MiB */
 /* after this many attaches are queued up, fall back to sync attach.  each
  * attach creates a kthread, so we don't allow too many at once, but sync
  * attach is very bad for applications, so we try to be generous.
@@ -189,7 +194,8 @@ struct pcc_super {
        __u64                    pccs_generation;
        /* Size threshold for asynchrous PCC-RO attach in background. */
        __u64                    pccs_async_threshold;
-       __u32                    pccs_dio_attach_size_bytes;
+       __u32                    pccs_dio_attach_iosize;
+       __u64                    pccs_dio_attach_threshold;
        bool                     pccs_async_affinity;
        umode_t                  pccs_mode;
        atomic_t                 pccs_attaches_queued;
index 3a53c8a..2eb48a2 100644 (file)
@@ -3860,18 +3860,21 @@ test_49b() {
        $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro ||
                skip "Server does not support PCC-RO"
 
-       local dio_attach_size=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_size_mb | head -n 1)
-       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=$dio_attach_size" EXIT
+       local dio_attach_iosize=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1)
+       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize" EXIT
+
+       local dio_threshold=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_threshold | head -n 1)
+       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=$dio_threshold" EXIT
 
        echo "Testing that invalid inputs should be clamped to [min, max] bound"
-       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=300 ||
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=300 ||
                error "setting iosize to > 256 MiB should be clamped"
-       dio_attach_size=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_size_mb | head -n 1)
+       dio_attach_iosize=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1)
        # allow future limit increase, but at least check limit was changed
-       (( $dio_attach_size >= 256 )) ||
+       (( $dio_attach_iosize >= 256 )) ||
                error "set iosize > 256 MiB should not be clamped < 256 MiB"
 
-       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=0 ||
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=0 ||
                error "should be able to set attach size to 0"
 
        echo "Normal testing from here on - no errors expected."
@@ -3880,8 +3883,11 @@ test_49b() {
        setup_pcc_mapping $SINGLEAGT \
                "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0\ pccrw=1\ pccro=1"
 
-       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=1 ||
-               error "failed to set dio_attach_size_mb"
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=1 ||
+               error "failed to set dio_attach_iosize_mb"
+
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=1048576 ||
+               error "failed to set dio_attach_threshold"
 
        # DIO attach size is 1 MiB, so this will DIO attach
        mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed"
@@ -3972,8 +3978,8 @@ test_49b() {
        check_lpcc_state $file "none"
 
        # Test attach with non-default size
-       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=16 ||
-               error "failed to set dio_attach_size"
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=16 ||
+               error "failed to set dio_attach_iosize"
 
        rm -f $file || error "failed to remove $file"
 
@@ -4012,11 +4018,17 @@ test_49c() {
        setup_pcc_mapping $SINGLEAGT \
                "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0\ pccrw=1\ pccro=1"
 
-       local dio_attach_size=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_size_mb | head -n 1)
-       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=$dio_attach_size" EXIT
+       local dio_attach_iosize=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1)
+       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize" EXIT
+
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=1 ||
+               error "failed to set dio_attach_iosize_mb"
 
-       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=1 ||
-               error "failed to set dio_attach_size_mb"
+       local dio_threshold=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_threshold | head -n 1)
+       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=$dio_threshold" EXIT
+
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=1048576 ||
+               error "failed to set dio_attach_threshold"
 
        # DIO attach size is 1 MiB, so this will DIO attach
        mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed"
@@ -4117,21 +4129,27 @@ run_test 49c "Test multiple attach in parallel"
 
 test_49d() {
        local file=$DIR/$tdir/$tfile
-       local io_size_mb=16
-       local io_size=$((1024 * 1024 * io_size_mb))
+       local iosize_mb=16
+       local iosize=$((1024 * 1024 * iosize_mb))
 
        $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro ||
                skip "Server does not support PCC-RO"
 
        mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 
-       local dio_attach_size=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_size_mb | head -n 1)
-       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=$dio_attach_size" EXIT
+       local dio_attach_iosize=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1)
+       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize" EXIT
+
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$iosize_mb ||
+               error "failed to set dio_attach_iosize_mb"
+
+       local dio_threshold=$(do_facet $SINGLEAGT $LCTL get_param -n llite.*.pcc_dio_attach_threshold | head -n 1)
+       stack_trap "do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=$dio_threshold" EXIT
 
-       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_size_mb=$io_size_mb ||
-               error "failed to set dio_attach_size_mb"
+       do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_dio_attach_threshold=$iosize ||
+               error "failed to set dio_attach_threshold"
 
-       # attach size is 16M and we stop testing at just over 2*io_size
+       # attach size is 16M and we stop testing at just over 2*iosize
        # This is necessary because reading from urandom is extremely slow
        dd if=/dev/urandom bs=1M of=$file.src count=48 ||
                error "dd to create source file failed"
@@ -4140,8 +4158,8 @@ test_49d() {
        setup_pcc_mapping $SINGLEAGT \
                "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0\ pccrw=1\ pccro=1"
 
-       for size in $((io_size * 2 + 2))  $((io_size * 2)) $((io_size + 2))\
-               $((io_size)) $((1024 * 1024 + 1)) $((1024 * 1024)) 5000 \
+       for size in $((iosize * 2 + 2))  $((iosize * 2)) $((iosize + 2))\
+               $((iosize)) $((1024 * 1024 + 1)) $((1024 * 1024)) 5000 \
                4096 2048 1; do
                # Shrink the source to the correct size so we can just read it
                # at large block sizes with dd to create our desired file size