Whamcloud - gitweb
LU-12631 llite: report latency for filesystem ops
[fs/lustre-release.git] / lustre / llite / lproc_llite.c
index ed66cd8..081316e 100644 (file)
@@ -351,11 +351,11 @@ ll_max_readahead_mb_seq_write(struct file *file, const char __user *buffer,
                return rc;
 
        pages_number = round_up(ra_max_mb, 1024 * 1024) >> PAGE_SHIFT;
-       if (pages_number < 0 || pages_number > totalram_pages / 2) {
+       if (pages_number < 0 || pages_number > cfs_totalram_pages() / 2) {
                /* 1/2 of RAM */
                CERROR("%s: can't set max_readahead_mb=%llu > %luMB\n",
                       sbi->ll_fsname, PAGES_TO_MiB(pages_number),
-                      PAGES_TO_MiB(totalram_pages));
+                      PAGES_TO_MiB(cfs_totalram_pages()));
                return -ERANGE;
        }
 
@@ -521,10 +521,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
        pages_number >>= PAGE_SHIFT;
 
-       if (pages_number < 0 || pages_number > totalram_pages) {
+       if (pages_number < 0 || pages_number > cfs_totalram_pages()) {
                CERROR("%s: can't set max cache more than %lu MB\n",
                       sbi->ll_fsname,
-                      PAGES_TO_MiB(totalram_pages));
+                      PAGES_TO_MiB(cfs_totalram_pages()));
                RETURN(-ERANGE);
        }
        /* Allow enough cache so clients can make well-formed RPCs */
@@ -875,6 +875,36 @@ static ssize_t lazystatfs_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(lazystatfs);
 
+static ssize_t statfs_max_age_show(struct kobject *kobj, struct attribute *attr,
+                                  char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_statfs_max_age);
+}
+
+static ssize_t statfs_max_age_store(struct kobject *kobj,
+                                   struct attribute *attr, const char *buffer,
+                                   size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned int val;
+       int rc;
+
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc)
+               return rc;
+       if (val > OBD_STATFS_CACHE_MAX_AGE)
+               return -EINVAL;
+
+       sbi->ll_statfs_max_age = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(statfs_max_age);
+
 static ssize_t max_easize_show(struct kobject *kobj,
                               struct attribute *attr,
                               char *buf)
@@ -888,7 +918,9 @@ static ssize_t max_easize_show(struct kobject *kobj,
        if (rc)
                return rc;
 
-       return sprintf(buf, "%u\n", ealen);
+       /* Limit xattr size returned to userspace based on kernel maximum */
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       ealen > XATTR_SIZE_MAX ? XATTR_SIZE_MAX : ealen);
 }
 LUSTRE_RO_ATTR(max_easize);
 
@@ -916,7 +948,9 @@ static ssize_t default_easize_show(struct kobject *kobj,
        if (rc)
                return rc;
 
-       return sprintf(buf, "%u\n", ealen);
+       /* Limit xattr size returned to userspace based on kernel maximum */
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       ealen > XATTR_SIZE_MAX ? XATTR_SIZE_MAX : ealen);
 }
 
 /**
@@ -1054,6 +1088,87 @@ static ssize_t tiny_write_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(tiny_write);
 
+static ssize_t max_read_ahead_async_active_show(struct kobject *kobj,
+                                              struct attribute *attr,
+                                              char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       sbi->ll_ra_info.ra_async_max_active);
+}
+
+static ssize_t max_read_ahead_async_active_store(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               const char *buffer,
+                                               size_t count)
+{
+       unsigned int val;
+       int rc;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc)
+               return rc;
+
+       if (val < 1 || val > WQ_UNBOUND_MAX_ACTIVE) {
+               CERROR("%s: cannot set max_read_ahead_async_active=%u %s than %u\n",
+                      sbi->ll_fsname, val,
+                      val < 1 ? "smaller" : "larger",
+                      val < 1 ? 1 : WQ_UNBOUND_MAX_ACTIVE);
+               return -ERANGE;
+       }
+
+       sbi->ll_ra_info.ra_async_max_active = val;
+       workqueue_set_max_active(sbi->ll_ra_info.ll_readahead_wq, val);
+
+       return count;
+}
+LUSTRE_RW_ATTR(max_read_ahead_async_active);
+
+static ssize_t read_ahead_async_file_threshold_mb_show(struct kobject *kobj,
+                                                      struct attribute *attr,
+                                                      char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n",
+            PAGES_TO_MiB(sbi->ll_ra_info.ra_async_pages_per_file_threshold));
+}
+
+static ssize_t
+read_ahead_async_file_threshold_mb_store(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        const char *buffer, size_t count)
+{
+       unsigned long pages_number;
+       unsigned long max_ra_per_file;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       int rc;
+
+       rc = kstrtoul(buffer, 10, &pages_number);
+       if (rc)
+               return rc;
+
+       pages_number = MiB_TO_PAGES(pages_number);
+       max_ra_per_file = sbi->ll_ra_info.ra_max_pages_per_file;
+       if (pages_number < 0 || pages_number > max_ra_per_file) {
+               CERROR("%s: can't set read_ahead_async_file_threshold_mb=%lu > "
+                      "max_read_readahead_per_file_mb=%lu\n", sbi->ll_fsname,
+                      PAGES_TO_MiB(pages_number),
+                      PAGES_TO_MiB(max_ra_per_file));
+               return -ERANGE;
+       }
+       sbi->ll_ra_info.ra_async_pages_per_file_threshold = pages_number;
+
+       return count;
+}
+LUSTRE_RW_ATTR(read_ahead_async_file_threshold_mb);
+
 static ssize_t fast_read_show(struct kobject *kobj,
                              struct attribute *attr,
                              char *buf)
@@ -1276,7 +1391,7 @@ static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
        struct root_squash_info *squash = &sbi->ll_squash;
        int len;
 
-       down_read(&squash->rsi_sem);
+       spin_lock(&squash->rsi_lock);
        if (!list_empty(&squash->rsi_nosquash_nids)) {
                len = cfs_print_nidlist(m->buf + m->count, m->size - m->count,
                                        &squash->rsi_nosquash_nids);
@@ -1285,7 +1400,7 @@ static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
        } else {
                seq_puts(m, "NONE\n");
        }
-       up_read(&squash->rsi_sem);
+       spin_unlock(&squash->rsi_lock);
 
        return 0;
 }
@@ -1396,6 +1511,7 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_statahead_max.attr,
        &lustre_attr_statahead_agl.attr,
        &lustre_attr_lazystatfs.attr,
+       &lustre_attr_statfs_max_age.attr,
        &lustre_attr_max_easize.attr,
        &lustre_attr_default_easize.attr,
        &lustre_attr_xattr_cache.attr,
@@ -1404,6 +1520,8 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_file_heat.attr,
        &lustre_attr_heat_decay_percentage.attr,
        &lustre_attr_heat_period_second.attr,
+       &lustre_attr_max_read_ahead_async_active.attr,
+       &lustre_attr_read_ahead_async_file_threshold_mb.attr,
        NULL,
 };
 
@@ -1420,68 +1538,66 @@ static struct kobj_type sbi_ktype = {
        .release        = sbi_kobj_release,
 };
 
+#define LPROCFS_TYPE_LATENCY \
+       (LPROCFS_TYPE_USEC | LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV)
 static const struct llite_file_opcode {
-        __u32       opcode;
-        __u32       type;
-        const char *opname;
+       __u32           opcode;
+       __u32           type;
+       const char      *opname;
 } llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
-        /* file operation */
-        { LPROC_LL_DIRTY_HITS,     LPROCFS_TYPE_REGS, "dirty_pages_hits" },
-        { LPROC_LL_DIRTY_MISSES,   LPROCFS_TYPE_REGS, "dirty_pages_misses" },
-        { LPROC_LL_READ_BYTES,     LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
-                                   "read_bytes" },
-        { LPROC_LL_WRITE_BYTES,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
-                                   "write_bytes" },
-        { LPROC_LL_BRW_READ,       LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
-                                   "brw_read" },
-        { LPROC_LL_BRW_WRITE,      LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
-                                   "brw_write" },
-        { LPROC_LL_IOCTL,          LPROCFS_TYPE_REGS, "ioctl" },
-        { LPROC_LL_OPEN,           LPROCFS_TYPE_REGS, "open" },
-        { LPROC_LL_RELEASE,        LPROCFS_TYPE_REGS, "close" },
-        { LPROC_LL_MAP,            LPROCFS_TYPE_REGS, "mmap" },
-       { LPROC_LL_FAULT,          LPROCFS_TYPE_REGS, "page_fault" },
-       { LPROC_LL_MKWRITE,        LPROCFS_TYPE_REGS, "page_mkwrite" },
-        { LPROC_LL_LLSEEK,         LPROCFS_TYPE_REGS, "seek" },
-        { LPROC_LL_FSYNC,          LPROCFS_TYPE_REGS, "fsync" },
-        { LPROC_LL_READDIR,        LPROCFS_TYPE_REGS, "readdir" },
-        /* inode operation */
-        { LPROC_LL_SETATTR,        LPROCFS_TYPE_REGS, "setattr" },
-        { LPROC_LL_TRUNC,          LPROCFS_TYPE_REGS, "truncate" },
-        { LPROC_LL_FLOCK,          LPROCFS_TYPE_REGS, "flock" },
-        { LPROC_LL_GETATTR,        LPROCFS_TYPE_REGS, "getattr" },
-        /* dir inode operation */
-        { LPROC_LL_CREATE,         LPROCFS_TYPE_REGS, "create" },
-        { LPROC_LL_LINK,           LPROCFS_TYPE_REGS, "link" },
-        { LPROC_LL_UNLINK,         LPROCFS_TYPE_REGS, "unlink" },
-        { LPROC_LL_SYMLINK,        LPROCFS_TYPE_REGS, "symlink" },
-        { LPROC_LL_MKDIR,          LPROCFS_TYPE_REGS, "mkdir" },
-        { LPROC_LL_RMDIR,          LPROCFS_TYPE_REGS, "rmdir" },
-        { LPROC_LL_MKNOD,          LPROCFS_TYPE_REGS, "mknod" },
-        { LPROC_LL_RENAME,         LPROCFS_TYPE_REGS, "rename" },
-        /* special inode operation */
-        { LPROC_LL_STAFS,          LPROCFS_TYPE_REGS, "statfs" },
-        { LPROC_LL_ALLOC_INODE,    LPROCFS_TYPE_REGS, "alloc_inode" },
-        { LPROC_LL_SETXATTR,       LPROCFS_TYPE_REGS, "setxattr" },
-        { LPROC_LL_GETXATTR,       LPROCFS_TYPE_REGS, "getxattr" },
-       { LPROC_LL_GETXATTR_HITS,  LPROCFS_TYPE_REGS, "getxattr_hits" },
-        { LPROC_LL_LISTXATTR,      LPROCFS_TYPE_REGS, "listxattr" },
-        { LPROC_LL_REMOVEXATTR,    LPROCFS_TYPE_REGS, "removexattr" },
-        { LPROC_LL_INODE_PERM,     LPROCFS_TYPE_REGS, "inode_permission" },
+       /* file operation */
+       { LPROC_LL_READ_BYTES,  LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
+               "read_bytes" },
+       { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
+               "write_bytes" },
+       { LPROC_LL_READ,        LPROCFS_TYPE_LATENCY,   "read" },
+       { LPROC_LL_WRITE,       LPROCFS_TYPE_LATENCY,   "write" },
+       { LPROC_LL_IOCTL,       LPROCFS_TYPE_REQS,      "ioctl" },
+       { LPROC_LL_OPEN,        LPROCFS_TYPE_LATENCY,   "open" },
+       { LPROC_LL_RELEASE,     LPROCFS_TYPE_LATENCY,   "close" },
+       { LPROC_LL_MMAP,        LPROCFS_TYPE_LATENCY,   "mmap" },
+       { LPROC_LL_FAULT,       LPROCFS_TYPE_LATENCY,   "page_fault" },
+       { LPROC_LL_MKWRITE,     LPROCFS_TYPE_LATENCY,   "page_mkwrite" },
+       { LPROC_LL_LLSEEK,      LPROCFS_TYPE_LATENCY,   "seek" },
+       { LPROC_LL_FSYNC,       LPROCFS_TYPE_LATENCY,   "fsync" },
+       { LPROC_LL_READDIR,     LPROCFS_TYPE_LATENCY,   "readdir" },
+       /* inode operation */
+       { LPROC_LL_SETATTR,     LPROCFS_TYPE_LATENCY,   "setattr" },
+       { LPROC_LL_TRUNC,       LPROCFS_TYPE_LATENCY,   "truncate" },
+       { LPROC_LL_FLOCK,       LPROCFS_TYPE_LATENCY,   "flock" },
+       { LPROC_LL_GETATTR,     LPROCFS_TYPE_LATENCY,   "getattr" },
+       /* dir inode operation */
+       { LPROC_LL_CREATE,      LPROCFS_TYPE_LATENCY,   "create" },
+       { LPROC_LL_LINK,        LPROCFS_TYPE_LATENCY,   "link" },
+       { LPROC_LL_UNLINK,      LPROCFS_TYPE_LATENCY,   "unlink" },
+       { LPROC_LL_SYMLINK,     LPROCFS_TYPE_LATENCY,   "symlink" },
+       { LPROC_LL_MKDIR,       LPROCFS_TYPE_LATENCY,   "mkdir" },
+       { LPROC_LL_RMDIR,       LPROCFS_TYPE_LATENCY,   "rmdir" },
+       { LPROC_LL_MKNOD,       LPROCFS_TYPE_LATENCY,   "mknod" },
+       { LPROC_LL_RENAME,      LPROCFS_TYPE_LATENCY,   "rename" },
+       /* special inode operation */
+       { LPROC_LL_STATFS,      LPROCFS_TYPE_LATENCY,   "statfs" },
+       { LPROC_LL_SETXATTR,    LPROCFS_TYPE_LATENCY,   "setxattr" },
+       { LPROC_LL_GETXATTR,    LPROCFS_TYPE_LATENCY,   "getxattr" },
+       { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REQS,    "getxattr_hits" },
+       { LPROC_LL_LISTXATTR,   LPROCFS_TYPE_LATENCY,   "listxattr" },
+       { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_LATENCY,   "removexattr" },
+       { LPROC_LL_INODE_PERM,  LPROCFS_TYPE_LATENCY,   "inode_permission" },
 };
 
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, long count)
 {
-        if (!sbi->ll_stats)
-                return;
-        if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
-        else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
-                 sbi->ll_stats_track_id == current->pid)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
-        else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
-                 sbi->ll_stats_track_id == current->parent->pid)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
+       if (!sbi->ll_stats)
+               return;
+
+       if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
+       else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
+                sbi->ll_stats_track_id == current->pid)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
+       else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
+                sbi->ll_stats_track_id == current->parent->pid)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
        else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
                 sbi->ll_stats_track_id ==
                        from_kgid(&init_user_ns, current_gid()))
@@ -1502,7 +1618,9 @@ static const char *ra_stat_string[] = {
        [RA_STAT_EOF] = "read-ahead to EOF",
        [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
        [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
-       [RA_STAT_FAILED_REACH_END] = "failed to reach end"
+       [RA_STAT_FAILED_REACH_END] = "failed to reach end",
+       [RA_STAT_ASYNC] = "async readahead",
+       [RA_STAT_FAILED_FAST_READ] = "failed to fast read",
 };
 
 int ll_debugfs_register_super(struct super_block *sb, const char *name)
@@ -1558,12 +1676,14 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
                u32 type = llite_opcode_table[id].type;
                void *ptr = NULL;
 
-               if (type & LPROCFS_TYPE_REGS)
-                       ptr = "regs";
+               if (type & LPROCFS_TYPE_REQS)
+                       ptr = "reqs";
                else if (type & LPROCFS_TYPE_BYTES)
                        ptr = "bytes";
                else if (type & LPROCFS_TYPE_PAGES)
                        ptr = "pages";
+               else if (type & LPROCFS_TYPE_USEC)
+                       ptr = "usec";
                lprocfs_counter_init(sbi->ll_stats,
                                     llite_opcode_table[id].opcode,
                                     (type & LPROCFS_CNTR_AVGMINMAX),
@@ -1839,15 +1959,15 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
                 lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
         }
 
-       for(i = 0; (count >= BIT(LL_HIST_START << i)) &&
-             (i < (LL_HIST_MAX - 1)); i++);
-        if (rw == 0) {
-                io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
-                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
-        } else {
-                io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
-                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
-        }
+       for (i = 0; (count >= BIT(LL_HIST_START + i)) &&
+            (i < (LL_HIST_MAX - 1)); i++);
+       if (rw == 0) {
+               io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
+               io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
+       } else {
+               io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
+               io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
+       }
        spin_unlock(&sbi->ll_pp_extent_lock);
 
        spin_lock(&sbi->ll_process_lock);
@@ -1933,7 +2053,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
        for (i = 0; i < LL_OFFSET_HIST_MAX; i++) {
                if (offset[i].rw_pid != 0)
                        seq_printf(seq,
-                                 "%3c %10d %14llu %14llu %17lu %17lu %14llu\n",
+                                 "%3c %10d %14llu %14llu %17lu %17lu %14lld\n",
                                   offset[i].rw_op == READ ? 'R' : 'W',
                                   offset[i].rw_pid,
                                   offset[i].rw_range_start,
@@ -1947,7 +2067,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
        for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
                if (process[i].rw_pid != 0)
                        seq_printf(seq,
-                                 "%3c %10d %14llu %14llu %17lu %17lu %14llu\n",
+                                 "%3c %10d %14llu %14llu %17lu %17lu %14lld\n",
                                   process[i].rw_op == READ ? 'R' : 'W',
                                   process[i].rw_pid,
                                   process[i].rw_range_start,