From d31a4dad4e698c537dff3d018fd67f196b2b293f Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Tue, 11 Jun 2019 14:54:20 -0400 Subject: [PATCH] LU-12394 llite: Fix extents_stats Patch 32517 from LU-8066 changed: (1 << LL_HIST_START << i) To: BIT(LL_HIST_START << i) But these are not equivalent because this changes the order of operations. The earlier one does the operations in this order: (1 << LL_HIST_START) << i The new one is this order: 1 << (LL_HIST_START << i) Which is quite different, as it's left shifting LL_HIST_START directly, and LL_HIST_START is a number of bits. The goal is really just to start with BIT(LL_HIST_START) and left shift by one (going from 4K, to 8K, etc) each time, so just use: BIT(LL_HIST_START + i) The result of this was that all i/os over 8K were placed in the 4K-8K stat bucket, because the loop exited early. Also add mmap'ed reads & writes to extents_stats. Add test for extents_stats. Fixes: adb5aca3d673 ("LU-8066 llite: Move all remaining procfs entries to debugfs") Signed-off-by: Patrick Farrell Change-Id: Iab4dc097234d411601a18d501075df45791d1138 Reviewed-on: https://review.whamcloud.com/35075 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/llite/file.c | 28 +++++++++++++++---- lustre/llite/llite_mmap.c | 11 ++++++++ lustre/llite/lproc_llite.c | 22 +++++++-------- lustre/llite/vvp_io.c | 4 --- lustre/tests/sanity.sh | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 21 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index dad39ac..6760ae0 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1658,6 +1658,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct lu_env *env; struct vvp_io_args *args; + struct file *file = iocb->ki_filp; ssize_t result; ssize_t rc2; __u16 refcheck; @@ -1681,7 +1682,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (cached) return result; - ll_ras_enter(iocb->ki_filp); + ll_ras_enter(file); result = ll_do_fast_read(iocb, to); if (result < 0 || iov_iter_count(to) == 0) @@ -1695,7 +1696,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) args->u.normal.via_iter = to; args->u.normal.via_iocb = iocb; - rc2 = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ, + rc2 = ll_file_io_generic(env, args, file, CIT_READ, &iocb->ki_pos, iov_iter_count(to)); if (rc2 > 0) result += rc2; @@ -1704,6 +1705,11 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) cl_env_put(env, &refcheck); out: + if (result > 0) + ll_rw_stats_tally(ll_i2sbi(file_inode(file)), current->pid, + LUSTRE_FPRIVATE(file), iocb->ki_pos, result, + READ); + return result; } @@ -1774,6 +1780,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct vvp_io_args *args; struct lu_env *env; ssize_t rc_tiny = 0, rc_normal; + struct file *file = iocb->ki_filp; __u16 refcheck; bool cached; int result; @@ -1802,8 +1809,8 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) * pages, and we can't do append writes because we can't guarantee the * required DLM locks are held to protect file size. */ - if (ll_sbi_has_tiny_write(ll_i2sbi(file_inode(iocb->ki_filp))) && - !(iocb->ki_filp->f_flags & (O_DIRECT | O_SYNC | O_APPEND))) + if (ll_sbi_has_tiny_write(ll_i2sbi(file_inode(file))) && + !(file->f_flags & (O_DIRECT | O_SYNC | O_APPEND))) rc_tiny = ll_do_tiny_write(iocb, from); /* In case of error, go on and try normal write - Only stop if tiny @@ -1820,8 +1827,8 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) args->u.normal.via_iter = from; args->u.normal.via_iocb = iocb; - rc_normal = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE, - &iocb->ki_pos, iov_iter_count(from)); + rc_normal = ll_file_io_generic(env, args, file, CIT_WRITE, + &iocb->ki_pos, iov_iter_count(from)); /* On success, combine bytes written. */ if (rc_tiny >= 0 && rc_normal > 0) @@ -1834,6 +1841,10 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) cl_env_put(env, &refcheck); out: + if (rc_normal > 0) + ll_rw_stats_tally(ll_i2sbi(file_inode(file)), current->pid, + LUSTRE_FPRIVATE(file), iocb->ki_pos, + rc_normal, WRITE); RETURN(rc_normal); } @@ -2011,6 +2022,11 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count); cl_env_put(env, &refcheck); + + if (result > 0) + ll_rw_stats_tally(ll_i2sbi(file_inode(in_file)), current->pid, + LUSTRE_FPRIVATE(in_file), *ppos, result, + READ); RETURN(result); } diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 1009c70..14ed60d 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -394,6 +394,12 @@ restart: result |= VM_FAULT_LOCKED; } cfs_restore_sigs(set); + + if (vmf->page && result == VM_FAULT_LOCKED) + ll_rw_stats_tally(ll_i2sbi(file_inode(vma->vm_file)), + current->pid, LUSTRE_FPRIVATE(vma->vm_file), + cl_offset(NULL, vmf->page->index), PAGE_SIZE, + READ); return result; } @@ -455,6 +461,11 @@ static vm_fault_t ll_page_mkwrite(struct vm_area_struct *vma, break; } + if (result == VM_FAULT_LOCKED) + ll_rw_stats_tally(ll_i2sbi(file_inode(vma->vm_file)), + current->pid, LUSTRE_FPRIVATE(vma->vm_file), + cl_offset(NULL, vmf->page->index), PAGE_SIZE, + WRITE); return result; } diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index bfb6949..c676ed6 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -1949,15 +1949,15 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist); } - for(i = 0; (count >= BIT(LL_HIST_START << i)) && - (i < (LL_HIST_MAX - 1)); i++); - if (rw == 0) { - io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++; - io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++; - } else { - io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++; - io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++; - } + for (i = 0; (count >= BIT(LL_HIST_START + i)) && + (i < (LL_HIST_MAX - 1)); i++); + if (rw == 0) { + io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++; + io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++; + } else { + io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++; + io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++; + } spin_unlock(&sbi->ll_pp_extent_lock); spin_lock(&sbi->ll_process_lock); @@ -2043,7 +2043,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v) for (i = 0; i < LL_OFFSET_HIST_MAX; i++) { if (offset[i].rw_pid != 0) seq_printf(seq, - "%3c %10d %14llu %14llu %17lu %17lu %14llu\n", + "%3c %10d %14llu %14llu %17lu %17lu %14lld\n", offset[i].rw_op == READ ? 'R' : 'W', offset[i].rw_pid, offset[i].rw_range_start, @@ -2057,7 +2057,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v) for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { if (process[i].rw_pid != 0) seq_printf(seq, - "%3c %10d %14llu %14llu %17lu %17lu %14llu\n", + "%3c %10d %14llu %14llu %17lu %17lu %14lld\n", process[i].rw_op == READ ? 'R' : 'W', process[i].rw_pid, process[i].rw_range_start, diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 294cfeb..2c0157f 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -846,8 +846,6 @@ out: if (result < cnt) io->ci_continue = 0; io->ci_nob += result; - ll_rw_stats_tally(ll_i2sbi(inode), current->pid, vio->vui_fd, - pos, result, READ); result = 0; } @@ -1144,8 +1142,6 @@ static int vvp_io_write_start(const struct lu_env *env, if (result < cnt) io->ci_continue = 0; - ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - vio->vui_fd, pos, result, WRITE); result = 0; } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d6e2f40..36ae722 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -11019,6 +11019,75 @@ test_127b() { # bug LU-333 } run_test 127b "verify the llite client stats are sane" +test_127c() { # LU-12394 + [ "$OSTCOUNT" -lt "2" ] && skip_env "needs >= 2 OSTs" + local size + local bsize + local reads + local writes + local count + + $LCTL set_param llite.*.extents_stats=1 + stack_trap "$LCTL set_param llite.*.extents_stats=0" EXIT + + # Use two stripes so there is enough space in default config + $LFS setstripe -c 2 $DIR/$tfile + + # Extent stats start at 0-4K and go in power of two buckets + # LL_HIST_START = 12 --> 2^12 = 4K + # We do 3K*2^i, so 3K, 6K, 12K, 24K... hitting each bucket. + # We do not do buckets larger than 64 MiB to avoid ENOSPC issues on + # small configs + for size in 3K 6K 12K 24K 48K 96K 192K 384K 768K 1536K 3M 6M 12M 24M 48M; + do + # Write and read, 2x each, second time at a non-zero offset + dd if=/dev/zero of=$DIR/$tfile bs=$size count=1 + dd if=/dev/zero of=$DIR/$tfile bs=$size count=1 seek=10 + dd if=$DIR/$tfile of=/dev/null bs=$size count=1 + dd if=$DIR/$tfile of=/dev/null bs=$size count=1 seek=10 + rm -f $DIR/$tfile + done + + $LCTL get_param llite.*.extents_stats + + count=2 + for bsize in 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M; + do + local bucket=$($LCTL get_param -n llite.*.extents_stats | + grep -m 1 $bsize) + reads=$(echo $bucket | awk '{print $5}') + writes=$(echo $bucket | awk '{print $9}') + [ "$reads" -eq $count ] || + error "$reads reads in < $bsize bucket, expect $count" + [ "$writes" -eq $count ] || + error "$writes writes in < $bsize bucket, expect $count" + done + + # Test mmap write and read + $LCTL set_param llite.*.extents_stats=c + size=512 + dd if=/dev/zero of=$DIR/$tfile bs=${size}K count=1 + $MULTIOP $DIR/$tfile OSMRUc || error "$MULTIOP $DIR/$tfile failed" + $MULTIOP $DIR/$tfile OSMWUc || error "$MULTIOP $DIR/$tfile failed" + + $LCTL get_param llite.*.extents_stats + + count=$(((size*1024) / PAGE_SIZE)) + + bsize=$((2 * PAGE_SIZE / 1024))K + + bucket=$($LCTL get_param -n llite.*.extents_stats | + grep -m 1 $bsize) + reads=$(echo $bucket | awk '{print $5}') + writes=$(echo $bucket | awk '{print $9}') + # mmap writes fault in the page first, creating an additonal read + [ "$reads" -eq $((2 * count)) ] || + error "$reads reads in < $bsize bucket, expect $count" + [ "$writes" -eq $count ] || + error "$writes writes in < $bsize bucket, expect $count" +} +run_test 127c "test llite extent stats with regular & mmap i/o" + test_128() { # bug 15212 touch $DIR/$tfile $LFS 2>&1 <<-EOF | tee $TMP/$tfile.log -- 1.8.3.1