Whamcloud - gitweb
LU-12631 llite: report latency for filesystem ops
[fs/lustre-release.git] / lustre / llite / lproc_llite.c
index eeb3ad1..081316e 100644 (file)
 static struct kobject *llite_kobj;
 static struct dentry *llite_root;
 
+static void llite_kobj_release(struct kobject *kobj)
+{
+       if (!IS_ERR_OR_NULL(llite_root)) {
+               debugfs_remove(llite_root);
+               llite_root = NULL;
+       }
+
+       kfree(kobj);
+}
+
+static struct kobj_type llite_kobj_ktype = {
+       .release        = llite_kobj_release,
+       .sysfs_ops      = &lustre_sysfs_ops,
+};
+
 int llite_tunables_register(void)
 {
-       int rc = 0;
+       int rc;
 
-       llite_kobj = class_setup_tunables("llite");
-       if (IS_ERR(llite_kobj))
-               return PTR_ERR(llite_kobj);
+       llite_kobj = kzalloc(sizeof(*llite_kobj), GFP_KERNEL);
+       if (!llite_kobj)
+               return -ENOMEM;
+
+       llite_kobj->kset = lustre_kset;
+       rc = kobject_init_and_add(llite_kobj, &llite_kobj_ktype,
+                                 &lustre_kset->kobj, "%s", "llite");
+       if (rc)
+               goto free_kobj;
 
        llite_root = debugfs_create_dir("llite", debugfs_lustre_root);
        if (IS_ERR_OR_NULL(llite_root)) {
                rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM;
                llite_root = NULL;
+free_kobj:
                kobject_put(llite_kobj);
                llite_kobj = NULL;
        }
@@ -67,15 +89,8 @@ int llite_tunables_register(void)
 
 void llite_tunables_unregister(void)
 {
-       if (llite_kobj) {
-               kobject_put(llite_kobj);
-               llite_kobj = NULL;
-       }
-
-       if (!IS_ERR_OR_NULL(llite_root)) {
-               debugfs_remove(llite_root);
-               llite_root = NULL;
-       }
+       kobject_put(llite_kobj);
+       llite_kobj = NULL;
 }
 
 /* <debugfs>/lustre/llite mount point registration */
@@ -311,15 +326,14 @@ static int ll_max_readahead_mb_seq_show(struct seq_file *m, void *v)
 {
        struct super_block *sb = m->private;
        struct ll_sb_info *sbi = ll_s2sbi(sb);
-       long pages_number;
-       int mult;
+       unsigned long ra_max_mb;
 
        spin_lock(&sbi->ll_lock);
-       pages_number = sbi->ll_ra_info.ra_max_pages;
+       ra_max_mb = PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages);
        spin_unlock(&sbi->ll_lock);
 
-       mult = 1 << (20 - PAGE_SHIFT);
-       return lprocfs_seq_read_frac_helper(m, pages_number, mult);
+       seq_printf(m, "%lu\n", ra_max_mb);
+       return 0;
 }
 
 static ssize_t
@@ -329,21 +343,19 @@ ll_max_readahead_mb_seq_write(struct file *file, const char __user *buffer,
        struct seq_file *m = file->private_data;
        struct super_block *sb = m->private;
        struct ll_sb_info *sbi = ll_s2sbi(sb);
-       __s64 pages_number;
+       s64 ra_max_mb, pages_number;
        int rc;
 
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &pages_number, 'M');
+       rc = lprocfs_str_with_units_to_s64(buffer, count, &ra_max_mb, 'M');
        if (rc)
                return rc;
 
-       pages_number >>= PAGE_SHIFT;
-
-       if (pages_number < 0 || pages_number > totalram_pages / 2) {
+       pages_number = round_up(ra_max_mb, 1024 * 1024) >> PAGE_SHIFT;
+       if (pages_number < 0 || pages_number > cfs_totalram_pages() / 2) {
                /* 1/2 of RAM */
-               CERROR("%s: can't set max_readahead_mb=%lu > %luMB\n",
-                      ll_get_fsname(sb, NULL, 0),
-                      (unsigned long)pages_number >> (20 - PAGE_SHIFT),
-                      totalram_pages >> (20 - PAGE_SHIFT + 1));
+               CERROR("%s: can't set max_readahead_mb=%llu > %luMB\n",
+                      sbi->ll_fsname, PAGES_TO_MiB(pages_number),
+                      PAGES_TO_MiB(cfs_totalram_pages()));
                return -ERANGE;
        }
 
@@ -360,15 +372,14 @@ static int ll_max_readahead_per_file_mb_seq_show(struct seq_file *m, void *v)
 {
        struct super_block *sb = m->private;
        struct ll_sb_info *sbi = ll_s2sbi(sb);
-       long pages_number;
-       int mult;
+       unsigned long ra_max_file_mb;
 
        spin_lock(&sbi->ll_lock);
-       pages_number = sbi->ll_ra_info.ra_max_pages_per_file;
+       ra_max_file_mb = PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages_per_file);
        spin_unlock(&sbi->ll_lock);
 
-       mult = 1 << (20 - PAGE_SHIFT);
-       return lprocfs_seq_read_frac_helper(m, pages_number, mult);
+       seq_printf(m, "%lu\n", ra_max_file_mb);
+       return 0;
 }
 
 static ssize_t
@@ -379,20 +390,19 @@ ll_max_readahead_per_file_mb_seq_write(struct file *file,
        struct seq_file *m = file->private_data;
        struct super_block *sb = m->private;
        struct ll_sb_info *sbi = ll_s2sbi(sb);
+       s64 ra_max_file_mb, pages_number;
        int rc;
-       __s64 pages_number;
 
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &pages_number, 'M');
+       rc = lprocfs_str_with_units_to_s64(buffer, count, &ra_max_file_mb,
+                                          'M');
        if (rc)
                return rc;
 
-       pages_number >>= PAGE_SHIFT;
-
+       pages_number = round_up(ra_max_file_mb, 1024 * 1024) >> PAGE_SHIFT;
        if (pages_number < 0 || pages_number > sbi->ll_ra_info.ra_max_pages) {
-               CERROR("%s: can't set max_readahead_per_file_mb=%lu > "
-                      "max_read_ahead_mb=%lu\n", ll_get_fsname(sb, NULL, 0),
-                      (unsigned long)pages_number >> (20 - PAGE_SHIFT),
-                      sbi->ll_ra_info.ra_max_pages >> (20 - PAGE_SHIFT));
+               CERROR("%s: can't set max_readahead_per_file_mb=%llu > max_read_ahead_mb=%lu\n",
+                      sbi->ll_fsname, PAGES_TO_MiB(pages_number),
+                      PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages));
                return -ERANGE;
        }
 
@@ -409,15 +419,14 @@ static int ll_max_read_ahead_whole_mb_seq_show(struct seq_file *m, void *v)
 {
        struct super_block *sb = m->private;
        struct ll_sb_info *sbi = ll_s2sbi(sb);
-       long pages_number;
-       int mult;
+       unsigned long ra_max_whole_mb;
 
        spin_lock(&sbi->ll_lock);
-       pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages;
+       ra_max_whole_mb = PAGES_TO_MiB(sbi->ll_ra_info.ra_max_read_ahead_whole_pages);
        spin_unlock(&sbi->ll_lock);
 
-       mult = 1 << (20 - PAGE_SHIFT);
-       return lprocfs_seq_read_frac_helper(m, pages_number, mult);
+       seq_printf(m, "%lu\n", ra_max_whole_mb);
+       return 0;
 }
 
 static ssize_t
@@ -428,25 +437,23 @@ ll_max_read_ahead_whole_mb_seq_write(struct file *file,
        struct seq_file *m = file->private_data;
        struct super_block *sb = m->private;
        struct ll_sb_info *sbi = ll_s2sbi(sb);
+       s64 ra_max_whole_mb, pages_number;
        int rc;
-       __s64 pages_number;
 
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &pages_number, 'M');
+       rc = lprocfs_str_with_units_to_s64(buffer, count, &ra_max_whole_mb,
+                                          'M');
        if (rc)
                return rc;
 
-       pages_number >>= PAGE_SHIFT;
-
+       pages_number = round_up(ra_max_whole_mb, 1024 * 1024) >> PAGE_SHIFT;
        /* Cap this at the current max readahead window size, the readahead
-        * algorithm does this anyway so it's pointless to set it larger. */
+        * algorithm does this anyway so it's pointless to set it larger.
+        */
        if (pages_number < 0 ||
            pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
-               int pages_shift = 20 - PAGE_SHIFT;
-               CERROR("%s: can't set max_read_ahead_whole_mb=%lu > "
-                      "max_read_ahead_per_file_mb=%lu\n",
-                      ll_get_fsname(sb, NULL, 0),
-                      (unsigned long)pages_number >> pages_shift,
-                      sbi->ll_ra_info.ra_max_pages_per_file >> pages_shift);
+               CERROR("%s: can't set max_read_ahead_whole_mb=%llu > max_read_ahead_per_file_mb=%lu\n",
+                      sbi->ll_fsname, PAGES_TO_MiB(pages_number),
+                      PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages_per_file));
                return -ERANGE;
        }
 
@@ -464,12 +471,11 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
        struct super_block     *sb    = m->private;
        struct ll_sb_info      *sbi   = ll_s2sbi(sb);
        struct cl_client_cache *cache = sbi->ll_cache;
-       int shift = 20 - PAGE_SHIFT;
        long max_cached_mb;
        long unused_mb;
 
-       max_cached_mb = cache->ccc_lru_max >> shift;
-       unused_mb = atomic_long_read(&cache->ccc_lru_left) >> shift;
+       max_cached_mb = PAGES_TO_MiB(cache->ccc_lru_max);
+       unused_mb = PAGES_TO_MiB(atomic_long_read(&cache->ccc_lru_left));
        seq_printf(m, "users: %d\n"
                      "max_cached_mb: %ld\n"
                      "used_mb: %ld\n"
@@ -515,10 +521,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
        pages_number >>= PAGE_SHIFT;
 
-       if (pages_number < 0 || pages_number > totalram_pages) {
+       if (pages_number < 0 || pages_number > cfs_totalram_pages()) {
                CERROR("%s: can't set max cache more than %lu MB\n",
-                      ll_get_fsname(sb, NULL, 0),
-                      totalram_pages >> (20 - PAGE_SHIFT));
+                      sbi->ll_fsname,
+                      PAGES_TO_MiB(cfs_totalram_pages()));
                RETURN(-ERANGE);
        }
        /* Allow enough cache so clients can make well-formed RPCs */
@@ -869,6 +875,36 @@ static ssize_t lazystatfs_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(lazystatfs);
 
+static ssize_t statfs_max_age_show(struct kobject *kobj, struct attribute *attr,
+                                  char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_statfs_max_age);
+}
+
+static ssize_t statfs_max_age_store(struct kobject *kobj,
+                                   struct attribute *attr, const char *buffer,
+                                   size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned int val;
+       int rc;
+
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc)
+               return rc;
+       if (val > OBD_STATFS_CACHE_MAX_AGE)
+               return -EINVAL;
+
+       sbi->ll_statfs_max_age = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(statfs_max_age);
+
 static ssize_t max_easize_show(struct kobject *kobj,
                               struct attribute *attr,
                               char *buf)
@@ -882,7 +918,9 @@ static ssize_t max_easize_show(struct kobject *kobj,
        if (rc)
                return rc;
 
-       return sprintf(buf, "%u\n", ealen);
+       /* Limit xattr size returned to userspace based on kernel maximum */
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       ealen > XATTR_SIZE_MAX ? XATTR_SIZE_MAX : ealen);
 }
 LUSTRE_RO_ATTR(max_easize);
 
@@ -910,7 +948,9 @@ static ssize_t default_easize_show(struct kobject *kobj,
        if (rc)
                return rc;
 
-       return sprintf(buf, "%u\n", ealen);
+       /* Limit xattr size returned to userspace based on kernel maximum */
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       ealen > XATTR_SIZE_MAX ? XATTR_SIZE_MAX : ealen);
 }
 
 /**
@@ -964,7 +1004,7 @@ static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
        while (flags != 0) {
                if (ARRAY_SIZE(str) <= i) {
                        CERROR("%s: Revise array LL_SBI_FLAGS to match sbi "
-                               "flags please.\n", ll_get_fsname(sb, NULL, 0));
+                               "flags please.\n", ll_s2sbi(sb)->ll_fsname);
                        return -EINVAL;
                }
 
@@ -1048,6 +1088,87 @@ static ssize_t tiny_write_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(tiny_write);
 
+static ssize_t max_read_ahead_async_active_show(struct kobject *kobj,
+                                              struct attribute *attr,
+                                              char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       sbi->ll_ra_info.ra_async_max_active);
+}
+
+static ssize_t max_read_ahead_async_active_store(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               const char *buffer,
+                                               size_t count)
+{
+       unsigned int val;
+       int rc;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc)
+               return rc;
+
+       if (val < 1 || val > WQ_UNBOUND_MAX_ACTIVE) {
+               CERROR("%s: cannot set max_read_ahead_async_active=%u %s than %u\n",
+                      sbi->ll_fsname, val,
+                      val < 1 ? "smaller" : "larger",
+                      val < 1 ? 1 : WQ_UNBOUND_MAX_ACTIVE);
+               return -ERANGE;
+       }
+
+       sbi->ll_ra_info.ra_async_max_active = val;
+       workqueue_set_max_active(sbi->ll_ra_info.ll_readahead_wq, val);
+
+       return count;
+}
+LUSTRE_RW_ATTR(max_read_ahead_async_active);
+
+static ssize_t read_ahead_async_file_threshold_mb_show(struct kobject *kobj,
+                                                      struct attribute *attr,
+                                                      char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n",
+            PAGES_TO_MiB(sbi->ll_ra_info.ra_async_pages_per_file_threshold));
+}
+
+static ssize_t
+read_ahead_async_file_threshold_mb_store(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        const char *buffer, size_t count)
+{
+       unsigned long pages_number;
+       unsigned long max_ra_per_file;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       int rc;
+
+       rc = kstrtoul(buffer, 10, &pages_number);
+       if (rc)
+               return rc;
+
+       pages_number = MiB_TO_PAGES(pages_number);
+       max_ra_per_file = sbi->ll_ra_info.ra_max_pages_per_file;
+       if (pages_number < 0 || pages_number > max_ra_per_file) {
+               CERROR("%s: can't set read_ahead_async_file_threshold_mb=%lu > "
+                      "max_read_readahead_per_file_mb=%lu\n", sbi->ll_fsname,
+                      PAGES_TO_MiB(pages_number),
+                      PAGES_TO_MiB(max_ra_per_file));
+               return -ERANGE;
+       }
+       sbi->ll_ra_info.ra_async_pages_per_file_threshold = pages_number;
+
+       return count;
+}
+LUSTRE_RW_ATTR(read_ahead_async_file_threshold_mb);
+
 static ssize_t fast_read_show(struct kobject *kobj,
                              struct attribute *attr,
                              char *buf)
@@ -1083,20 +1204,21 @@ static ssize_t fast_read_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(fast_read);
 
-static ssize_t pio_show(struct kobject *kobj,
-                       struct attribute *attr,
-                       char *buf)
+static ssize_t file_heat_show(struct kobject *kobj,
+                             struct attribute *attr,
+                             char *buf)
 {
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
 
-       return sprintf(buf, "%u\n", !!(sbi->ll_flags & LL_SBI_PIO));
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       !!(sbi->ll_flags & LL_SBI_FILE_HEAT));
 }
 
-static ssize_t pio_store(struct kobject *kobj,
-                        struct attribute *attr,
-                        const char *buffer,
-                        size_t count)
+static ssize_t file_heat_store(struct kobject *kobj,
+                              struct attribute *attr,
+                              const char *buffer,
+                              size_t count)
 {
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
@@ -1109,14 +1231,81 @@ static ssize_t pio_store(struct kobject *kobj,
 
        spin_lock(&sbi->ll_lock);
        if (val)
-               sbi->ll_flags |= LL_SBI_PIO;
+               sbi->ll_flags |= LL_SBI_FILE_HEAT;
        else
-               sbi->ll_flags &= ~LL_SBI_PIO;
+               sbi->ll_flags &= ~LL_SBI_FILE_HEAT;
        spin_unlock(&sbi->ll_lock);
 
        return count;
 }
-LUSTRE_RW_ATTR(pio);
+LUSTRE_RW_ATTR(file_heat);
+
+static ssize_t heat_decay_percentage_show(struct kobject *kobj,
+                                         struct attribute *attr,
+                                         char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                      (sbi->ll_heat_decay_weight * 100 + 128) / 256);
+}
+
+static ssize_t heat_decay_percentage_store(struct kobject *kobj,
+                                          struct attribute *attr,
+                                          const char *buffer,
+                                          size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned long val;
+       int rc;
+
+       rc = kstrtoul(buffer, 10, &val);
+       if (rc)
+               return rc;
+
+       if (val < 0 || val > 100)
+               return -ERANGE;
+
+       sbi->ll_heat_decay_weight = (val * 256 + 50) / 100;
+
+       return count;
+}
+LUSTRE_RW_ATTR(heat_decay_percentage);
+
+static ssize_t heat_period_second_show(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_heat_period_second);
+}
+
+static ssize_t heat_period_second_store(struct kobject *kobj,
+                                       struct attribute *attr,
+                                       const char *buffer,
+                                       size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned long val;
+       int rc;
+
+       rc = kstrtoul(buffer, 10, &val);
+       if (rc)
+               return rc;
+
+       if (val <= 0)
+               return -ERANGE;
+
+       sbi->ll_heat_period_second = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(heat_period_second);
 
 static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
 {
@@ -1190,8 +1379,7 @@ static ssize_t ll_root_squash_seq_write(struct file *file,
        struct ll_sb_info *sbi = ll_s2sbi(sb);
        struct root_squash_info *squash = &sbi->ll_squash;
 
-       return lprocfs_wr_root_squash(buffer, count, squash,
-                                     ll_get_fsname(sb, NULL, 0));
+       return lprocfs_wr_root_squash(buffer, count, squash, sbi->ll_fsname);
 }
 
 LDEBUGFS_SEQ_FOPS(ll_root_squash);
@@ -1203,7 +1391,7 @@ static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
        struct root_squash_info *squash = &sbi->ll_squash;
        int len;
 
-       down_read(&squash->rsi_sem);
+       spin_lock(&squash->rsi_lock);
        if (!list_empty(&squash->rsi_nosquash_nids)) {
                len = cfs_print_nidlist(m->buf + m->count, m->size - m->count,
                                        &squash->rsi_nosquash_nids);
@@ -1212,7 +1400,7 @@ static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
        } else {
                seq_puts(m, "NONE\n");
        }
-       up_read(&squash->rsi_sem);
+       spin_unlock(&squash->rsi_lock);
 
        return 0;
 }
@@ -1227,8 +1415,7 @@ static ssize_t ll_nosquash_nids_seq_write(struct file *file,
        struct root_squash_info *squash = &sbi->ll_squash;
        int rc;
 
-       rc = lprocfs_wr_nosquash_nids(buffer, count, squash,
-                                     ll_get_fsname(sb, NULL, 0));
+       rc = lprocfs_wr_nosquash_nids(buffer, count, squash, sbi->ll_fsname);
        if (rc < 0)
                return rc;
 
@@ -1239,6 +1426,43 @@ static ssize_t ll_nosquash_nids_seq_write(struct file *file,
 
 LDEBUGFS_SEQ_FOPS(ll_nosquash_nids);
 
+static int ll_pcc_seq_show(struct seq_file *m, void *v)
+{
+       struct super_block *sb = m->private;
+       struct ll_sb_info *sbi = ll_s2sbi(sb);
+
+       return pcc_super_dump(&sbi->ll_pcc_super, m);
+}
+
+static ssize_t ll_pcc_seq_write(struct file *file, const char __user *buffer,
+                               size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct super_block *sb = m->private;
+       struct ll_sb_info *sbi = ll_s2sbi(sb);
+       int rc;
+       char *kernbuf;
+
+       if (count >= LPROCFS_WR_PCC_MAX_CMD)
+               return -EINVAL;
+
+       if (!(exp_connect_flags2(sbi->ll_md_exp) & OBD_CONNECT2_PCC))
+               return -EOPNOTSUPP;
+
+       OBD_ALLOC(kernbuf, count + 1);
+       if (kernbuf == NULL)
+               return -ENOMEM;
+
+       if (copy_from_user(kernbuf, buffer, count))
+               GOTO(out_free_kernbuff, rc = -EFAULT);
+
+       rc = pcc_cmd_handle(kernbuf, count, &sbi->ll_pcc_super);
+out_free_kernbuff:
+       OBD_FREE(kernbuf, count + 1);
+       return rc ? rc : count;
+}
+LPROC_SEQ_FOPS(ll_pcc);
+
 struct lprocfs_vars lprocfs_llite_obd_vars[] = {
        { .name =       "site",
          .fops =       &ll_site_stats_fops                     },
@@ -1260,6 +1484,8 @@ struct lprocfs_vars lprocfs_llite_obd_vars[] = {
          .fops =       &ll_root_squash_fops                    },
        { .name =       "nosquash_nids",
          .fops =       &ll_nosquash_nids_fops                  },
+       { .name =       "pcc",
+         .fops =       &ll_pcc_fops,                           },
        { NULL }
 };
 
@@ -1285,90 +1511,93 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_statahead_max.attr,
        &lustre_attr_statahead_agl.attr,
        &lustre_attr_lazystatfs.attr,
+       &lustre_attr_statfs_max_age.attr,
        &lustre_attr_max_easize.attr,
        &lustre_attr_default_easize.attr,
        &lustre_attr_xattr_cache.attr,
        &lustre_attr_fast_read.attr,
-       &lustre_attr_pio.attr,
        &lustre_attr_tiny_write.attr,
+       &lustre_attr_file_heat.attr,
+       &lustre_attr_heat_decay_percentage.attr,
+       &lustre_attr_heat_period_second.attr,
+       &lustre_attr_max_read_ahead_async_active.attr,
+       &lustre_attr_read_ahead_async_file_threshold_mb.attr,
        NULL,
 };
 
-static void llite_kobj_release(struct kobject *kobj)
+static void sbi_kobj_release(struct kobject *kobj)
 {
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
        complete(&sbi->ll_kobj_unregister);
 }
 
-static struct kobj_type llite_ktype = {
+static struct kobj_type sbi_ktype = {
        .default_attrs  = llite_attrs,
        .sysfs_ops      = &lustre_sysfs_ops,
-       .release        = llite_kobj_release,
+       .release        = sbi_kobj_release,
 };
 
+#define LPROCFS_TYPE_LATENCY \
+       (LPROCFS_TYPE_USEC | LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV)
 static const struct llite_file_opcode {
-        __u32       opcode;
-        __u32       type;
-        const char *opname;
+       __u32           opcode;
+       __u32           type;
+       const char      *opname;
 } llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
-        /* file operation */
-        { LPROC_LL_DIRTY_HITS,     LPROCFS_TYPE_REGS, "dirty_pages_hits" },
-        { LPROC_LL_DIRTY_MISSES,   LPROCFS_TYPE_REGS, "dirty_pages_misses" },
-        { LPROC_LL_READ_BYTES,     LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
-                                   "read_bytes" },
-        { LPROC_LL_WRITE_BYTES,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
-                                   "write_bytes" },
-        { LPROC_LL_BRW_READ,       LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
-                                   "brw_read" },
-        { LPROC_LL_BRW_WRITE,      LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
-                                   "brw_write" },
-        { LPROC_LL_IOCTL,          LPROCFS_TYPE_REGS, "ioctl" },
-        { LPROC_LL_OPEN,           LPROCFS_TYPE_REGS, "open" },
-        { LPROC_LL_RELEASE,        LPROCFS_TYPE_REGS, "close" },
-        { LPROC_LL_MAP,            LPROCFS_TYPE_REGS, "mmap" },
-       { LPROC_LL_FAULT,          LPROCFS_TYPE_REGS, "page_fault" },
-       { LPROC_LL_MKWRITE,        LPROCFS_TYPE_REGS, "page_mkwrite" },
-        { LPROC_LL_LLSEEK,         LPROCFS_TYPE_REGS, "seek" },
-        { LPROC_LL_FSYNC,          LPROCFS_TYPE_REGS, "fsync" },
-        { LPROC_LL_READDIR,        LPROCFS_TYPE_REGS, "readdir" },
-        /* inode operation */
-        { LPROC_LL_SETATTR,        LPROCFS_TYPE_REGS, "setattr" },
-        { LPROC_LL_TRUNC,          LPROCFS_TYPE_REGS, "truncate" },
-        { LPROC_LL_FLOCK,          LPROCFS_TYPE_REGS, "flock" },
-        { LPROC_LL_GETATTR,        LPROCFS_TYPE_REGS, "getattr" },
-        /* dir inode operation */
-        { LPROC_LL_CREATE,         LPROCFS_TYPE_REGS, "create" },
-        { LPROC_LL_LINK,           LPROCFS_TYPE_REGS, "link" },
-        { LPROC_LL_UNLINK,         LPROCFS_TYPE_REGS, "unlink" },
-        { LPROC_LL_SYMLINK,        LPROCFS_TYPE_REGS, "symlink" },
-        { LPROC_LL_MKDIR,          LPROCFS_TYPE_REGS, "mkdir" },
-        { LPROC_LL_RMDIR,          LPROCFS_TYPE_REGS, "rmdir" },
-        { LPROC_LL_MKNOD,          LPROCFS_TYPE_REGS, "mknod" },
-        { LPROC_LL_RENAME,         LPROCFS_TYPE_REGS, "rename" },
-        /* special inode operation */
-        { LPROC_LL_STAFS,          LPROCFS_TYPE_REGS, "statfs" },
-        { LPROC_LL_ALLOC_INODE,    LPROCFS_TYPE_REGS, "alloc_inode" },
-        { LPROC_LL_SETXATTR,       LPROCFS_TYPE_REGS, "setxattr" },
-        { LPROC_LL_GETXATTR,       LPROCFS_TYPE_REGS, "getxattr" },
-       { LPROC_LL_GETXATTR_HITS,  LPROCFS_TYPE_REGS, "getxattr_hits" },
-        { LPROC_LL_LISTXATTR,      LPROCFS_TYPE_REGS, "listxattr" },
-        { LPROC_LL_REMOVEXATTR,    LPROCFS_TYPE_REGS, "removexattr" },
-        { LPROC_LL_INODE_PERM,     LPROCFS_TYPE_REGS, "inode_permission" },
+       /* file operation */
+       { LPROC_LL_READ_BYTES,  LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
+               "read_bytes" },
+       { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
+               "write_bytes" },
+       { LPROC_LL_READ,        LPROCFS_TYPE_LATENCY,   "read" },
+       { LPROC_LL_WRITE,       LPROCFS_TYPE_LATENCY,   "write" },
+       { LPROC_LL_IOCTL,       LPROCFS_TYPE_REQS,      "ioctl" },
+       { LPROC_LL_OPEN,        LPROCFS_TYPE_LATENCY,   "open" },
+       { LPROC_LL_RELEASE,     LPROCFS_TYPE_LATENCY,   "close" },
+       { LPROC_LL_MMAP,        LPROCFS_TYPE_LATENCY,   "mmap" },
+       { LPROC_LL_FAULT,       LPROCFS_TYPE_LATENCY,   "page_fault" },
+       { LPROC_LL_MKWRITE,     LPROCFS_TYPE_LATENCY,   "page_mkwrite" },
+       { LPROC_LL_LLSEEK,      LPROCFS_TYPE_LATENCY,   "seek" },
+       { LPROC_LL_FSYNC,       LPROCFS_TYPE_LATENCY,   "fsync" },
+       { LPROC_LL_READDIR,     LPROCFS_TYPE_LATENCY,   "readdir" },
+       /* inode operation */
+       { LPROC_LL_SETATTR,     LPROCFS_TYPE_LATENCY,   "setattr" },
+       { LPROC_LL_TRUNC,       LPROCFS_TYPE_LATENCY,   "truncate" },
+       { LPROC_LL_FLOCK,       LPROCFS_TYPE_LATENCY,   "flock" },
+       { LPROC_LL_GETATTR,     LPROCFS_TYPE_LATENCY,   "getattr" },
+       /* dir inode operation */
+       { LPROC_LL_CREATE,      LPROCFS_TYPE_LATENCY,   "create" },
+       { LPROC_LL_LINK,        LPROCFS_TYPE_LATENCY,   "link" },
+       { LPROC_LL_UNLINK,      LPROCFS_TYPE_LATENCY,   "unlink" },
+       { LPROC_LL_SYMLINK,     LPROCFS_TYPE_LATENCY,   "symlink" },
+       { LPROC_LL_MKDIR,       LPROCFS_TYPE_LATENCY,   "mkdir" },
+       { LPROC_LL_RMDIR,       LPROCFS_TYPE_LATENCY,   "rmdir" },
+       { LPROC_LL_MKNOD,       LPROCFS_TYPE_LATENCY,   "mknod" },
+       { LPROC_LL_RENAME,      LPROCFS_TYPE_LATENCY,   "rename" },
+       /* special inode operation */
+       { LPROC_LL_STATFS,      LPROCFS_TYPE_LATENCY,   "statfs" },
+       { LPROC_LL_SETXATTR,    LPROCFS_TYPE_LATENCY,   "setxattr" },
+       { LPROC_LL_GETXATTR,    LPROCFS_TYPE_LATENCY,   "getxattr" },
+       { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REQS,    "getxattr_hits" },
+       { LPROC_LL_LISTXATTR,   LPROCFS_TYPE_LATENCY,   "listxattr" },
+       { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_LATENCY,   "removexattr" },
+       { LPROC_LL_INODE_PERM,  LPROCFS_TYPE_LATENCY,   "inode_permission" },
 };
 
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, long count)
 {
-        if (!sbi->ll_stats)
-                return;
-        if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
-        else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
-                 sbi->ll_stats_track_id == current->pid)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
-        else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
-                 sbi->ll_stats_track_id == current->parent->pid)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
+       if (!sbi->ll_stats)
+               return;
+
+       if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
+       else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
+                sbi->ll_stats_track_id == current->pid)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
+       else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
+                sbi->ll_stats_track_id == current->parent->pid)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
        else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
                 sbi->ll_stats_track_id ==
                        from_kgid(&init_user_ns, current_gid()))
@@ -1389,7 +1618,9 @@ static const char *ra_stat_string[] = {
        [RA_STAT_EOF] = "read-ahead to EOF",
        [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
        [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
-       [RA_STAT_FAILED_REACH_END] = "failed to reach end"
+       [RA_STAT_FAILED_REACH_END] = "failed to reach end",
+       [RA_STAT_ASYNC] = "async readahead",
+       [RA_STAT_FAILED_FAST_READ] = "failed to fast read",
 };
 
 int ll_debugfs_register_super(struct super_block *sb, const char *name)
@@ -1445,12 +1676,14 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
                u32 type = llite_opcode_table[id].type;
                void *ptr = NULL;
 
-               if (type & LPROCFS_TYPE_REGS)
-                       ptr = "regs";
+               if (type & LPROCFS_TYPE_REQS)
+                       ptr = "reqs";
                else if (type & LPROCFS_TYPE_BYTES)
                        ptr = "bytes";
                else if (type & LPROCFS_TYPE_PAGES)
                        ptr = "pages";
+               else if (type & LPROCFS_TYPE_USEC)
+                       ptr = "usec";
                lprocfs_counter_init(sbi->ll_stats,
                                     llite_opcode_table[id].opcode,
                                     (type & LPROCFS_CNTR_AVGMINMAX),
@@ -1479,7 +1712,7 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
 out_ll_kset:
        /* Yes we also register sysfs mount kset here as well */
        sbi->ll_kset.kobj.parent = llite_kobj;
-       sbi->ll_kset.kobj.ktype = &llite_ktype;
+       sbi->ll_kset.kobj.ktype = &sbi_ktype;
        init_completion(&sbi->ll_kobj_unregister);
        err = kobject_set_name(&sbi->ll_kset.kobj, "%s", name);
        if (err)
@@ -1726,15 +1959,15 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
                 lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
         }
 
-       for(i = 0; (count >= BIT(LL_HIST_START << i)) &&
-             (i < (LL_HIST_MAX - 1)); i++);
-        if (rw == 0) {
-                io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
-                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
-        } else {
-                io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
-                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
-        }
+       for (i = 0; (count >= BIT(LL_HIST_START + i)) &&
+            (i < (LL_HIST_MAX - 1)); i++);
+       if (rw == 0) {
+               io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
+               io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
+       } else {
+               io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
+               io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
+       }
        spin_unlock(&sbi->ll_pp_extent_lock);
 
        spin_lock(&sbi->ll_process_lock);
@@ -1820,7 +2053,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
        for (i = 0; i < LL_OFFSET_HIST_MAX; i++) {
                if (offset[i].rw_pid != 0)
                        seq_printf(seq,
-                                 "%3c %10d %14llu %14llu %17lu %17lu %14llu\n",
+                                 "%3c %10d %14llu %14llu %17lu %17lu %14lld\n",
                                   offset[i].rw_op == READ ? 'R' : 'W',
                                   offset[i].rw_pid,
                                   offset[i].rw_range_start,
@@ -1834,7 +2067,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
        for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
                if (process[i].rw_pid != 0)
                        seq_printf(seq,
-                                 "%3c %10d %14llu %14llu %17lu %17lu %14llu\n",
+                                 "%3c %10d %14llu %14llu %17lu %17lu %14lld\n",
                                   process[i].rw_op == READ ? 'R' : 'W',
                                   process[i].rw_pid,
                                   process[i].rw_range_start,