Whamcloud - gitweb
LU-12394 llite: Fix extents_stats 75/35075/8
authorPatrick Farrell <pfarrell@whamcloud.com>
Tue, 11 Jun 2019 18:54:20 +0000 (14:54 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 21 Aug 2019 04:56:33 +0000 (04:56 +0000)
Patch 32517 from LU-8066 changed:
        (1 << LL_HIST_START << i)

To:

        BIT(LL_HIST_START << i)

But these are not equivalent because this changes the order
of operations.  The earlier one does the operations in this
order:
        (1 << LL_HIST_START) << i

The new one is this order:
        1 << (LL_HIST_START << i)

Which is quite different, as it's left shifting
LL_HIST_START directly, and LL_HIST_START is a number of
bits.

The goal is really just to start with BIT(LL_HIST_START)
and left shift by one (going from 4K, to 8K, etc) each
time, so just use:
        BIT(LL_HIST_START + i)

The result of this was that all i/os over 8K were placed in
the 4K-8K stat bucket, because the loop exited early.

Also add mmap'ed reads & writes to extents_stats.

Add test for extents_stats.

Fixes: adb5aca3d673 ("LU-8066 llite: Move all remaining procfs entries
                     to debugfs")

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: Iab4dc097234d411601a18d501075df45791d1138
Reviewed-on: https://review.whamcloud.com/35075
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/llite/file.c
lustre/llite/llite_mmap.c
lustre/llite/lproc_llite.c
lustre/llite/vvp_io.c
lustre/tests/sanity.sh

index dad39ac..6760ae0 100644 (file)
@@ -1658,6 +1658,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct lu_env *env;
        struct vvp_io_args *args;
 {
        struct lu_env *env;
        struct vvp_io_args *args;
+       struct file *file = iocb->ki_filp;
        ssize_t result;
        ssize_t rc2;
        __u16 refcheck;
        ssize_t result;
        ssize_t rc2;
        __u16 refcheck;
@@ -1681,7 +1682,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (cached)
                return result;
 
        if (cached)
                return result;
 
-       ll_ras_enter(iocb->ki_filp);
+       ll_ras_enter(file);
 
        result = ll_do_fast_read(iocb, to);
        if (result < 0 || iov_iter_count(to) == 0)
 
        result = ll_do_fast_read(iocb, to);
        if (result < 0 || iov_iter_count(to) == 0)
@@ -1695,7 +1696,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        args->u.normal.via_iter = to;
        args->u.normal.via_iocb = iocb;
 
        args->u.normal.via_iter = to;
        args->u.normal.via_iocb = iocb;
 
-       rc2 = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
+       rc2 = ll_file_io_generic(env, args, file, CIT_READ,
                                 &iocb->ki_pos, iov_iter_count(to));
        if (rc2 > 0)
                result += rc2;
                                 &iocb->ki_pos, iov_iter_count(to));
        if (rc2 > 0)
                result += rc2;
@@ -1704,6 +1705,11 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 
        cl_env_put(env, &refcheck);
 out:
 
        cl_env_put(env, &refcheck);
 out:
+       if (result > 0)
+               ll_rw_stats_tally(ll_i2sbi(file_inode(file)), current->pid,
+                                 LUSTRE_FPRIVATE(file), iocb->ki_pos, result,
+                                 READ);
+
        return result;
 }
 
        return result;
 }
 
@@ -1774,6 +1780,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct vvp_io_args *args;
        struct lu_env *env;
        ssize_t rc_tiny = 0, rc_normal;
        struct vvp_io_args *args;
        struct lu_env *env;
        ssize_t rc_tiny = 0, rc_normal;
+       struct file *file = iocb->ki_filp;
        __u16 refcheck;
        bool cached;
        int result;
        __u16 refcheck;
        bool cached;
        int result;
@@ -1802,8 +1809,8 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
         * pages, and we can't do append writes because we can't guarantee the
         * required DLM locks are held to protect file size.
         */
         * pages, and we can't do append writes because we can't guarantee the
         * required DLM locks are held to protect file size.
         */
-       if (ll_sbi_has_tiny_write(ll_i2sbi(file_inode(iocb->ki_filp))) &&
-           !(iocb->ki_filp->f_flags & (O_DIRECT | O_SYNC | O_APPEND)))
+       if (ll_sbi_has_tiny_write(ll_i2sbi(file_inode(file))) &&
+           !(file->f_flags & (O_DIRECT | O_SYNC | O_APPEND)))
                rc_tiny = ll_do_tiny_write(iocb, from);
 
        /* In case of error, go on and try normal write - Only stop if tiny
                rc_tiny = ll_do_tiny_write(iocb, from);
 
        /* In case of error, go on and try normal write - Only stop if tiny
@@ -1820,8 +1827,8 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        args->u.normal.via_iter = from;
        args->u.normal.via_iocb = iocb;
 
        args->u.normal.via_iter = from;
        args->u.normal.via_iocb = iocb;
 
-       rc_normal = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
-                                   &iocb->ki_pos, iov_iter_count(from));
+       rc_normal = ll_file_io_generic(env, args, file, CIT_WRITE,
+                                      &iocb->ki_pos, iov_iter_count(from));
 
        /* On success, combine bytes written. */
        if (rc_tiny >= 0 && rc_normal > 0)
 
        /* On success, combine bytes written. */
        if (rc_tiny >= 0 && rc_normal > 0)
@@ -1834,6 +1841,10 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        cl_env_put(env, &refcheck);
 out:
 
        cl_env_put(env, &refcheck);
 out:
+       if (rc_normal > 0)
+               ll_rw_stats_tally(ll_i2sbi(file_inode(file)), current->pid,
+                                 LUSTRE_FPRIVATE(file), iocb->ki_pos,
+                                 rc_normal, WRITE);
        RETURN(rc_normal);
 }
 
        RETURN(rc_normal);
 }
 
@@ -2011,6 +2022,11 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
 
         result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
         cl_env_put(env, &refcheck);
 
         result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
         cl_env_put(env, &refcheck);
+
+       if (result > 0)
+               ll_rw_stats_tally(ll_i2sbi(file_inode(in_file)), current->pid,
+                                 LUSTRE_FPRIVATE(in_file), *ppos, result,
+                                 READ);
         RETURN(result);
 }
 
         RETURN(result);
 }
 
index 1009c70..14ed60d 100644 (file)
@@ -394,6 +394,12 @@ restart:
                 result |= VM_FAULT_LOCKED;
         }
        cfs_restore_sigs(set);
                 result |= VM_FAULT_LOCKED;
         }
        cfs_restore_sigs(set);
+
+       if (vmf->page && result == VM_FAULT_LOCKED)
+               ll_rw_stats_tally(ll_i2sbi(file_inode(vma->vm_file)),
+                                 current->pid, LUSTRE_FPRIVATE(vma->vm_file),
+                                 cl_offset(NULL, vmf->page->index), PAGE_SIZE,
+                                 READ);
         return result;
 }
 
         return result;
 }
 
@@ -455,6 +461,11 @@ static vm_fault_t ll_page_mkwrite(struct vm_area_struct *vma,
                 break;
         }
 
                 break;
         }
 
+       if (result == VM_FAULT_LOCKED)
+               ll_rw_stats_tally(ll_i2sbi(file_inode(vma->vm_file)),
+                                 current->pid, LUSTRE_FPRIVATE(vma->vm_file),
+                                 cl_offset(NULL, vmf->page->index), PAGE_SIZE,
+                                 WRITE);
         return result;
 }
 
         return result;
 }
 
index bfb6949..c676ed6 100644 (file)
@@ -1949,15 +1949,15 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
                 lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
         }
 
                 lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
         }
 
-       for(i = 0; (count >= BIT(LL_HIST_START << i)) &&
-             (i < (LL_HIST_MAX - 1)); i++);
-        if (rw == 0) {
-                io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
-                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
-        } else {
-                io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
-                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
-        }
+       for (i = 0; (count >= BIT(LL_HIST_START + i)) &&
+            (i < (LL_HIST_MAX - 1)); i++);
+       if (rw == 0) {
+               io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
+               io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
+       } else {
+               io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
+               io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
+       }
        spin_unlock(&sbi->ll_pp_extent_lock);
 
        spin_lock(&sbi->ll_process_lock);
        spin_unlock(&sbi->ll_pp_extent_lock);
 
        spin_lock(&sbi->ll_process_lock);
@@ -2043,7 +2043,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
        for (i = 0; i < LL_OFFSET_HIST_MAX; i++) {
                if (offset[i].rw_pid != 0)
                        seq_printf(seq,
        for (i = 0; i < LL_OFFSET_HIST_MAX; i++) {
                if (offset[i].rw_pid != 0)
                        seq_printf(seq,
-                                 "%3c %10d %14llu %14llu %17lu %17lu %14llu\n",
+                                 "%3c %10d %14llu %14llu %17lu %17lu %14lld\n",
                                   offset[i].rw_op == READ ? 'R' : 'W',
                                   offset[i].rw_pid,
                                   offset[i].rw_range_start,
                                   offset[i].rw_op == READ ? 'R' : 'W',
                                   offset[i].rw_pid,
                                   offset[i].rw_range_start,
@@ -2057,7 +2057,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
        for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
                if (process[i].rw_pid != 0)
                        seq_printf(seq,
        for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
                if (process[i].rw_pid != 0)
                        seq_printf(seq,
-                                 "%3c %10d %14llu %14llu %17lu %17lu %14llu\n",
+                                 "%3c %10d %14llu %14llu %17lu %17lu %14lld\n",
                                   process[i].rw_op == READ ? 'R' : 'W',
                                   process[i].rw_pid,
                                   process[i].rw_range_start,
                                   process[i].rw_op == READ ? 'R' : 'W',
                                   process[i].rw_pid,
                                   process[i].rw_range_start,
index 294cfeb..2c0157f 100644 (file)
@@ -846,8 +846,6 @@ out:
                if (result < cnt)
                        io->ci_continue = 0;
                io->ci_nob += result;
                if (result < cnt)
                        io->ci_continue = 0;
                io->ci_nob += result;
-               ll_rw_stats_tally(ll_i2sbi(inode), current->pid, vio->vui_fd,
-                                 pos, result, READ);
                result = 0;
        }
 
                result = 0;
        }
 
@@ -1144,8 +1142,6 @@ static int vvp_io_write_start(const struct lu_env *env,
 
                if (result < cnt)
                        io->ci_continue = 0;
 
                if (result < cnt)
                        io->ci_continue = 0;
-               ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
-                                 vio->vui_fd, pos, result, WRITE);
                result = 0;
        }
 
                result = 0;
        }
 
index d6e2f40..36ae722 100644 (file)
@@ -11019,6 +11019,75 @@ test_127b() { # bug LU-333
 }
 run_test 127b "verify the llite client stats are sane"
 
 }
 run_test 127b "verify the llite client stats are sane"
 
+test_127c() { # LU-12394
+       [ "$OSTCOUNT" -lt "2" ] && skip_env "needs >= 2 OSTs"
+       local size
+       local bsize
+       local reads
+       local writes
+       local count
+
+       $LCTL set_param llite.*.extents_stats=1
+       stack_trap "$LCTL set_param llite.*.extents_stats=0" EXIT
+
+       # Use two stripes so there is enough space in default config
+       $LFS setstripe -c 2 $DIR/$tfile
+
+       # Extent stats start at 0-4K and go in power of two buckets
+       # LL_HIST_START = 12 --> 2^12 = 4K
+       # We do 3K*2^i, so 3K, 6K, 12K, 24K... hitting each bucket.
+       # We do not do buckets larger than 64 MiB to avoid ENOSPC issues on
+       # small configs
+       for size in 3K 6K 12K 24K 48K 96K 192K 384K 768K 1536K 3M 6M 12M 24M 48M;
+               do
+               # Write and read, 2x each, second time at a non-zero offset
+               dd if=/dev/zero of=$DIR/$tfile bs=$size count=1
+               dd if=/dev/zero of=$DIR/$tfile bs=$size count=1 seek=10
+               dd if=$DIR/$tfile of=/dev/null bs=$size count=1
+               dd if=$DIR/$tfile of=/dev/null bs=$size count=1 seek=10
+               rm -f $DIR/$tfile
+       done
+
+       $LCTL get_param llite.*.extents_stats
+
+       count=2
+       for bsize in 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M;
+               do
+               local bucket=$($LCTL get_param -n llite.*.extents_stats |
+                               grep -m 1 $bsize)
+               reads=$(echo $bucket | awk '{print $5}')
+               writes=$(echo $bucket | awk '{print $9}')
+               [ "$reads" -eq $count ] ||
+                       error "$reads reads in < $bsize bucket, expect $count"
+               [ "$writes" -eq $count ] ||
+                       error "$writes writes in < $bsize bucket, expect $count"
+       done
+
+       # Test mmap write and read
+       $LCTL set_param llite.*.extents_stats=c
+       size=512
+       dd if=/dev/zero of=$DIR/$tfile bs=${size}K count=1
+       $MULTIOP $DIR/$tfile OSMRUc || error "$MULTIOP $DIR/$tfile failed"
+       $MULTIOP $DIR/$tfile OSMWUc || error "$MULTIOP $DIR/$tfile failed"
+
+       $LCTL get_param llite.*.extents_stats
+
+       count=$(((size*1024) / PAGE_SIZE))
+
+       bsize=$((2 * PAGE_SIZE / 1024))K
+
+       bucket=$($LCTL get_param -n llite.*.extents_stats |
+                       grep -m 1 $bsize)
+       reads=$(echo $bucket | awk '{print $5}')
+       writes=$(echo $bucket | awk '{print $9}')
+       # mmap writes fault in the page first, creating an additonal read
+       [ "$reads" -eq $((2 * count)) ] ||
+               error "$reads reads in < $bsize bucket, expect $count"
+       [ "$writes" -eq $count ] ||
+               error "$writes writes in < $bsize bucket, expect $count"
+}
+run_test 127c "test llite extent stats with regular & mmap i/o"
+
 test_128() { # bug 15212
        touch $DIR/$tfile
        $LFS 2>&1 <<-EOF | tee $TMP/$tfile.log
 test_128() { # bug 15212
        touch $DIR/$tfile
        $LFS 2>&1 <<-EOF | tee $TMP/$tfile.log