From 40d286e11138fc67f5e5d428072d1a1944686b27 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Sun, 5 Dec 2021 21:50:39 -0500 Subject: [PATCH] LU-15317 llite: Add D_IOTRACE In looking in to performance problems, it's very important to be able to trace the I/O patterns from userspace in to Lustre, and also understand the key basics of how Lustre handles that I/O (readahead, RPC generation). This is best done with a dedicated debug flag - No userspace tool can provide all this information, and existing debug flags collect a huge number of unrelated pieces of, well, debug information. The goal is for customers to be able to quickly gather log files of a reasonable size which contain the necessary information and which can easily be interpreted by engineering. This is not possible if the information is spread out across a number of heavyweight debug flags. This is a first pass at adding the flag and the debug required to track basic data I/O. One significant omission in the first patch is RPC generation - I have not decided how best to do that yet. That will be added in a future patch. test-parameters: trivial Signed-off-by: Patrick Farrell Change-Id: I0ed003ec1488e1c267b194c871f64b34f6dc6025 Reviewed-on: https://review.whamcloud.com/45752 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Sebastien Buisson Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lnet/include/uapi/linux/lnet/libcfs_debug.h | 3 ++- lustre/llite/file.c | 20 ++++++++++++++++---- lustre/llite/llite_internal.h | 1 + lustre/llite/llite_mmap.c | 11 +++++++++-- lustre/llite/rw.c | 10 ++++++++-- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/lnet/include/uapi/linux/lnet/libcfs_debug.h b/lnet/include/uapi/linux/lnet/libcfs_debug.h index 3358506..28c89b6 100644 --- a/lnet/include/uapi/linux/lnet/libcfs_debug.h +++ b/lnet/include/uapi/linux/lnet/libcfs_debug.h @@ -105,6 +105,7 @@ struct ptldebug_header { #define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ #define D_INODE 0x00000002 #define D_SUPER 0x00000004 +#define D_IOTRACE 0x00000008 /* simple, low overhead io tracing */ #define D_MALLOC 0x00000010 /* print malloc, free information */ #define D_CACHE 0x00000020 /* cache-related items */ #define D_INFO 0x00000040 /* general information */ @@ -135,7 +136,7 @@ struct ptldebug_header { #define D_LAYOUT 0x80000000 #define LIBCFS_DEBUG_MASKS_NAMES { \ - "trace", "inode", "super", "tty", "malloc", "cache", "info", \ + "trace", "inode", "super", "iotrace", "malloc", "cache", "info",\ "ioctl", "neterror", "net", "warning", "buffs", "other", \ "dentry", "nettrace", "page", "dlmtrace", "error", "emerg", \ "ha", "rpctrace", "vfstrace", "reada", "mmap", "config", \ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index a970888..1fe29db 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1940,8 +1940,15 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ktime_t kstart = ktime_get(); bool cached; + ENTRY; + + CDEBUG(D_VFSTRACE|D_IOTRACE, "file %s:"DFID", ppos: %lld, count: %zu\n", + file_dentry(file)->d_name.name, + PFID(ll_inode2fid(file_inode(file))), iocb->ki_pos, + iov_iter_count(to)); + if (!iov_iter_count(to)) - return 0; + RETURN(0); /** * Currently when PCC read failed, we do not fall back to the @@ -1966,7 +1973,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) env = cl_env_get(&refcheck); if (IS_ERR(env)) - return PTR_ERR(env); + RETURN(PTR_ERR(env)); args = ll_env_args(env); args->u.normal.via_iter = to; @@ -1989,7 +1996,7 @@ out: ktime_us_delta(ktime_get(), kstart)); } - return result; + RETURN(result); } /** @@ -2065,6 +2072,11 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ENTRY; + CDEBUG(D_VFSTRACE|D_IOTRACE, "file %s:"DFID", ppos: %lld, count: %zu\n", + file_dentry(file)->d_name.name, + PFID(ll_inode2fid(file_inode(file))), iocb->ki_pos, + iov_iter_count(from)); + if (!iov_iter_count(from)) GOTO(out, rc_normal = 0); @@ -2099,7 +2111,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) env = cl_env_get(&refcheck); if (IS_ERR(env)) - return PTR_ERR(env); + RETURN(PTR_ERR(env)); args = ll_env_args(env); args->u.normal.via_iter = from; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 6639f4c..bc3aff9 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -880,6 +880,7 @@ struct ll_readahead_work { struct file *lrw_file; pgoff_t lrw_start_idx; pgoff_t lrw_end_idx; + pid_t lrw_user_pid; /* async worker to handler read */ struct work_struct lrw_readahead_work; diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 203a56c..d976c25 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -370,9 +370,10 @@ static vm_fault_t ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (cached) goto out; - CDEBUG(D_MMAP, DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx\n", + CDEBUG(D_MMAP|D_IOTRACE, + DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx idx=%lu\n", PFID(&ll_i2info(file_inode(vma->vm_file))->lli_fid), - vma, vma->vm_start, vma->vm_end, vma->vm_flags); + vma, vma->vm_start, vma->vm_end, vma->vm_flags, vmf->pgoff); /* Only SIGKILL and SIGTERM is allowed for fault/nopage/mkwrite * so that it can be killed by admin but not cause segfault by @@ -441,6 +442,12 @@ static vm_fault_t ll_page_mkwrite(struct vm_area_struct *vma, ktime_t kstart = ktime_get(); vm_fault_t result; + CDEBUG(D_MMAP|D_IOTRACE, + DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx idx=%lu\n", + PFID(&ll_i2info(file_inode(vma->vm_file))->lli_fid), + vma, vma->vm_start, vma->vm_end, vma->vm_flags, + vmf->page->index); + result = pcc_page_mkwrite(vma, vmf, &cached); if (cached) goto out; diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 1af37f2..86436b7 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -591,6 +591,11 @@ static void ll_readahead_handle_work(struct work_struct *wq) inode = file_inode(file); sbi = ll_i2sbi(inode); + CDEBUG(D_READA|D_IOTRACE, + "%s: async ra from %lu to %lu triggered by user pid %d\n", + file_dentry(file)->d_name.name, work->lrw_start_idx, + work->lrw_end_idx, work->lrw_user_pid); + env = cl_env_alloc(&refcheck, LCT_NOREF); if (IS_ERR(env)) GOTO(out_free_work, rc = PTR_ERR(env)); @@ -1301,7 +1306,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, spin_lock(&ras->ras_lock); if (!hit) - CDEBUG(D_READA, DFID " pages at %lu miss.\n", + CDEBUG(D_READA|D_IOTRACE, DFID " pages at %lu miss.\n", PFID(ll_inode2fid(inode)), index); ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS); @@ -1670,7 +1675,7 @@ int ll_io_read_page(const struct lu_env *env, struct cl_io *io, skip_index = vvp_index(vpg); rc2 = ll_readahead(env, io, &queue->c2_qin, ras, uptodate, file, skip_index); - CDEBUG(D_READA, DFID " %d pages read ahead at %lu\n", + CDEBUG(D_READA|D_IOTRACE, DFID " %d pages read ahead at %lu\n", PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg)); } else if (vvp_index(vpg) == io_start_index && io_end_index - io_start_index > 0) { @@ -1769,6 +1774,7 @@ static int kickoff_async_readahead(struct file *file, unsigned long pages) lrw->lrw_file = get_file(file); lrw->lrw_start_idx = start_idx; lrw->lrw_end_idx = end_idx; + lrw->lrw_user_pid = current->pid; spin_lock(&ras->ras_lock); ras->ras_next_readahead_idx = end_idx + 1; ras->ras_async_last_readpage_idx = start_idx; -- 1.8.3.1