Whamcloud - gitweb
LU-15317 llite: Add D_IOTRACE 52/45752/6
authorPatrick Farrell <pfarrell@whamcloud.com>
Mon, 6 Dec 2021 02:50:39 +0000 (21:50 -0500)
committerOleg Drokin <green@whamcloud.com>
Thu, 6 Jan 2022 22:03:26 +0000 (22:03 +0000)
In looking in to performance problems, it's very important
to be able to trace the I/O patterns from userspace in to
Lustre, and also understand the key basics of how Lustre
handles that I/O (readahead, RPC generation).

This is best done with a dedicated debug flag - No
userspace tool can provide all this information, and
existing debug flags collect a huge number of unrelated
pieces of, well, debug information.

The goal is for customers to be able to quickly gather log
files of a reasonable size which contain the necessary
information and which can easily be interpreted by
engineering.  This is not possible if the information is
spread out across a number of heavyweight debug flags.

This is a first pass at adding the flag and the debug
required to track basic data I/O.  One significant
omission in the first patch is RPC generation - I have not
decided how best to do that yet.  That will be added in a
future patch.

test-parameters: trivial

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I0ed003ec1488e1c267b194c871f64b34f6dc6025
Reviewed-on: https://review.whamcloud.com/45752
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/uapi/linux/lnet/libcfs_debug.h
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_mmap.c
lustre/llite/rw.c

index 3358506..28c89b6 100644 (file)
@@ -105,6 +105,7 @@ struct ptldebug_header {
 #define D_TRACE         0x00000001 /* ENTRY/EXIT markers */
 #define D_INODE         0x00000002
 #define D_SUPER         0x00000004
+#define D_IOTRACE      0x00000008 /* simple, low overhead io tracing */
 #define D_MALLOC        0x00000010 /* print malloc, free information */
 #define D_CACHE         0x00000020 /* cache-related items */
 #define D_INFO          0x00000040 /* general information */
@@ -135,7 +136,7 @@ struct ptldebug_header {
 #define D_LAYOUT        0x80000000
 
 #define LIBCFS_DEBUG_MASKS_NAMES {                                     \
-       "trace", "inode", "super", "tty", "malloc", "cache", "info",    \
+       "trace", "inode", "super", "iotrace", "malloc", "cache", "info",\
        "ioctl", "neterror", "net", "warning", "buffs", "other",        \
        "dentry", "nettrace", "page", "dlmtrace", "error", "emerg",     \
        "ha", "rpctrace", "vfstrace", "reada", "mmap", "config",        \
index a970888..1fe29db 100644 (file)
@@ -1940,8 +1940,15 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        ktime_t kstart = ktime_get();
        bool cached;
 
+       ENTRY;
+
+       CDEBUG(D_VFSTRACE|D_IOTRACE, "file %s:"DFID", ppos: %lld, count: %zu\n",
+              file_dentry(file)->d_name.name,
+              PFID(ll_inode2fid(file_inode(file))), iocb->ki_pos,
+              iov_iter_count(to));
+
        if (!iov_iter_count(to))
-               return 0;
+               RETURN(0);
 
        /**
         * Currently when PCC read failed, we do not fall back to the
@@ -1966,7 +1973,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 
        env = cl_env_get(&refcheck);
        if (IS_ERR(env))
-               return PTR_ERR(env);
+               RETURN(PTR_ERR(env));
 
        args = ll_env_args(env);
        args->u.normal.via_iter = to;
@@ -1989,7 +1996,7 @@ out:
                                   ktime_us_delta(ktime_get(), kstart));
        }
 
-       return result;
+       RETURN(result);
 }
 
 /**
@@ -2065,6 +2072,11 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        ENTRY;
 
+       CDEBUG(D_VFSTRACE|D_IOTRACE, "file %s:"DFID", ppos: %lld, count: %zu\n",
+              file_dentry(file)->d_name.name,
+              PFID(ll_inode2fid(file_inode(file))), iocb->ki_pos,
+              iov_iter_count(from));
+
        if (!iov_iter_count(from))
                GOTO(out, rc_normal = 0);
 
@@ -2099,7 +2111,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        env = cl_env_get(&refcheck);
        if (IS_ERR(env))
-               return PTR_ERR(env);
+               RETURN(PTR_ERR(env));
 
        args = ll_env_args(env);
        args->u.normal.via_iter = from;
index 6639f4c..bc3aff9 100644 (file)
@@ -880,6 +880,7 @@ struct ll_readahead_work {
        struct file                     *lrw_file;
        pgoff_t                          lrw_start_idx;
        pgoff_t                          lrw_end_idx;
+       pid_t                            lrw_user_pid;
 
        /* async worker to handler read */
        struct work_struct               lrw_readahead_work;
index 203a56c..d976c25 100644 (file)
@@ -370,9 +370,10 @@ static vm_fault_t ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        if (cached)
                goto out;
 
-       CDEBUG(D_MMAP, DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx\n",
+       CDEBUG(D_MMAP|D_IOTRACE,
+              DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx idx=%lu\n",
               PFID(&ll_i2info(file_inode(vma->vm_file))->lli_fid),
-              vma, vma->vm_start, vma->vm_end, vma->vm_flags);
+              vma, vma->vm_start, vma->vm_end, vma->vm_flags, vmf->pgoff);
 
        /* Only SIGKILL and SIGTERM is allowed for fault/nopage/mkwrite
         * so that it can be killed by admin but not cause segfault by
@@ -441,6 +442,12 @@ static vm_fault_t ll_page_mkwrite(struct vm_area_struct *vma,
        ktime_t kstart = ktime_get();
        vm_fault_t result;
 
+       CDEBUG(D_MMAP|D_IOTRACE,
+              DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx idx=%lu\n",
+              PFID(&ll_i2info(file_inode(vma->vm_file))->lli_fid),
+              vma, vma->vm_start, vma->vm_end, vma->vm_flags,
+              vmf->page->index);
+
        result = pcc_page_mkwrite(vma, vmf, &cached);
        if (cached)
                goto out;
index 1af37f2..86436b7 100644 (file)
@@ -591,6 +591,11 @@ static void ll_readahead_handle_work(struct work_struct *wq)
        inode = file_inode(file);
        sbi = ll_i2sbi(inode);
 
+       CDEBUG(D_READA|D_IOTRACE,
+              "%s: async ra from %lu to %lu triggered by user pid %d\n",
+              file_dentry(file)->d_name.name, work->lrw_start_idx,
+              work->lrw_end_idx, work->lrw_user_pid);
+
        env = cl_env_alloc(&refcheck, LCT_NOREF);
        if (IS_ERR(env))
                GOTO(out_free_work, rc = PTR_ERR(env));
@@ -1301,7 +1306,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
        spin_lock(&ras->ras_lock);
 
        if (!hit)
-               CDEBUG(D_READA, DFID " pages at %lu miss.\n",
+               CDEBUG(D_READA|D_IOTRACE, DFID " pages at %lu miss.\n",
                       PFID(ll_inode2fid(inode)), index);
        ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
 
@@ -1670,7 +1675,7 @@ int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
                        skip_index = vvp_index(vpg);
                rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
                                   uptodate, file, skip_index);
-               CDEBUG(D_READA, DFID " %d pages read ahead at %lu\n",
+               CDEBUG(D_READA|D_IOTRACE, DFID " %d pages read ahead at %lu\n",
                       PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
        } else if (vvp_index(vpg) == io_start_index &&
                   io_end_index - io_start_index > 0) {
@@ -1769,6 +1774,7 @@ static int kickoff_async_readahead(struct file *file, unsigned long pages)
                lrw->lrw_file = get_file(file);
                lrw->lrw_start_idx = start_idx;
                lrw->lrw_end_idx = end_idx;
+               lrw->lrw_user_pid = current->pid;
                spin_lock(&ras->ras_lock);
                ras->ras_next_readahead_idx = end_idx + 1;
                ras->ras_async_last_readpage_idx = start_idx;