Whamcloud - gitweb
LU-12142 readahead: limit over reservation
[fs/lustre-release.git] / lustre / llite / lproc_llite.c
index bfb6949..23b67d4 100644 (file)
@@ -33,9 +33,8 @@
 
 #include <linux/version.h>
 #include <linux/user_namespace.h>
-#ifdef HAVE_UIDGID_HEADER
-# include <linux/uidgid.h>
-#endif
+#include <linux/uidgid.h>
+
 #include <uapi/linux/lustre/lustre_param.h>
 #include <lprocfs_status.h>
 #include <obd_support.h>
@@ -76,13 +75,11 @@ int llite_tunables_register(void)
                goto free_kobj;
 
        llite_root = debugfs_create_dir("llite", debugfs_lustre_root);
-       if (IS_ERR_OR_NULL(llite_root)) {
-               rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM;
-               llite_root = NULL;
+       return 0;
+
 free_kobj:
-               kobject_put(llite_kobj);
-               llite_kobj = NULL;
-       }
+       kobject_put(llite_kobj);
+       llite_kobj = NULL;
 
        return rc;
 }
@@ -292,6 +289,14 @@ static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr,
 }
 LUSTRE_RO_ATTR(client_type);
 
+LUSTRE_RW_ATTR(foreign_symlink_enable);
+
+LUSTRE_RW_ATTR(foreign_symlink_prefix);
+
+LUSTRE_RW_ATTR(foreign_symlink_upcall);
+
+LUSTRE_WO_ATTR(foreign_symlink_upcall_info);
+
 static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
                           char *buf)
 {
@@ -322,40 +327,37 @@ static int ll_site_stats_seq_show(struct seq_file *m, void *v)
 
 LDEBUGFS_SEQ_FOPS_RO(ll_site_stats);
 
-static int ll_max_readahead_mb_seq_show(struct seq_file *m, void *v)
+static ssize_t max_read_ahead_mb_show(struct kobject *kobj,
+                                     struct attribute *attr, char *buf)
 {
-       struct super_block *sb = m->private;
-       struct ll_sb_info *sbi = ll_s2sbi(sb);
-       unsigned long ra_max_mb;
-
-       spin_lock(&sbi->ll_lock);
-       ra_max_mb = PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages);
-       spin_unlock(&sbi->ll_lock);
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
 
-       seq_printf(m, "%lu\n", ra_max_mb);
-       return 0;
+       return scnprintf(buf, PAGE_SIZE, "%lu\n",
+                       PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages));
 }
 
-static ssize_t
-ll_max_readahead_mb_seq_write(struct file *file, const char __user *buffer,
-                             size_t count, loff_t *off)
+static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      const char *buffer, size_t count)
 {
-       struct seq_file *m = file->private_data;
-       struct super_block *sb = m->private;
-       struct ll_sb_info *sbi = ll_s2sbi(sb);
-       s64 ra_max_mb, pages_number;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       u64 ra_max_mb, pages_number;
        int rc;
 
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &ra_max_mb, 'M');
+       rc = sysfs_memparse(buffer, count, &ra_max_mb, "MiB");
        if (rc)
                return rc;
 
        pages_number = round_up(ra_max_mb, 1024 * 1024) >> PAGE_SHIFT;
-       if (pages_number < 0 || pages_number > cfs_totalram_pages() / 2) {
+       CDEBUG(D_INFO, "%s: set max_read_ahead_mb=%llu (%llu pages)\n",
+              sbi->ll_fsname, PAGES_TO_MiB(pages_number), pages_number);
+       if (pages_number > cfs_totalram_pages() / 2) {
                /* 1/2 of RAM */
-               CERROR("%s: can't set max_readahead_mb=%llu > %luMB\n",
+               CERROR("%s: cannot set max_read_ahead_mb=%llu > totalram/2=%luMB\n",
                       sbi->ll_fsname, PAGES_TO_MiB(pages_number),
-                      PAGES_TO_MiB(cfs_totalram_pages()));
+                      PAGES_TO_MiB(cfs_totalram_pages() / 2));
                return -ERANGE;
        }
 
@@ -365,42 +367,36 @@ ll_max_readahead_mb_seq_write(struct file *file, const char __user *buffer,
 
        return count;
 }
+LUSTRE_RW_ATTR(max_read_ahead_mb);
 
-LDEBUGFS_SEQ_FOPS(ll_max_readahead_mb);
-
-static int ll_max_readahead_per_file_mb_seq_show(struct seq_file *m, void *v)
+static ssize_t max_read_ahead_per_file_mb_show(struct kobject *kobj,
+                                              struct attribute *attr,
+                                              char *buf)
 {
-       struct super_block *sb = m->private;
-       struct ll_sb_info *sbi = ll_s2sbi(sb);
-       unsigned long ra_max_file_mb;
-
-       spin_lock(&sbi->ll_lock);
-       ra_max_file_mb = PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages_per_file);
-       spin_unlock(&sbi->ll_lock);
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
 
-       seq_printf(m, "%lu\n", ra_max_file_mb);
-       return 0;
+       return scnprintf(buf, PAGE_SIZE, "%lu\n",
+                        PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages_per_file));
 }
 
-static ssize_t
-ll_max_readahead_per_file_mb_seq_write(struct file *file,
-                                      const char __user *buffer,
-                                      size_t count, loff_t *off)
+static ssize_t max_read_ahead_per_file_mb_store(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               const char *buffer,
+                                               size_t count)
 {
-       struct seq_file *m = file->private_data;
-       struct super_block *sb = m->private;
-       struct ll_sb_info *sbi = ll_s2sbi(sb);
-       s64 ra_max_file_mb, pages_number;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       u64 ra_max_file_mb, pages_number;
        int rc;
 
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &ra_max_file_mb,
-                                          'M');
+       rc = sysfs_memparse(buffer, count, &ra_max_file_mb, "MiB");
        if (rc)
                return rc;
 
        pages_number = round_up(ra_max_file_mb, 1024 * 1024) >> PAGE_SHIFT;
-       if (pages_number < 0 || pages_number > sbi->ll_ra_info.ra_max_pages) {
-               CERROR("%s: can't set max_readahead_per_file_mb=%llu > max_read_ahead_mb=%lu\n",
+       if (pages_number > sbi->ll_ra_info.ra_max_pages) {
+               CERROR("%s: cannot set max_read_ahead_per_file_mb=%llu > max_read_ahead_mb=%lu\n",
                       sbi->ll_fsname, PAGES_TO_MiB(pages_number),
                       PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages));
                return -ERANGE;
@@ -412,36 +408,28 @@ ll_max_readahead_per_file_mb_seq_write(struct file *file,
 
        return count;
 }
+LUSTRE_RW_ATTR(max_read_ahead_per_file_mb);
 
-LDEBUGFS_SEQ_FOPS(ll_max_readahead_per_file_mb);
-
-static int ll_max_read_ahead_whole_mb_seq_show(struct seq_file *m, void *v)
+static ssize_t max_read_ahead_whole_mb_show(struct kobject *kobj,
+                                           struct attribute *attr, char *buf)
 {
-       struct super_block *sb = m->private;
-       struct ll_sb_info *sbi = ll_s2sbi(sb);
-       unsigned long ra_max_whole_mb;
-
-       spin_lock(&sbi->ll_lock);
-       ra_max_whole_mb = PAGES_TO_MiB(sbi->ll_ra_info.ra_max_read_ahead_whole_pages);
-       spin_unlock(&sbi->ll_lock);
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
 
-       seq_printf(m, "%lu\n", ra_max_whole_mb);
-       return 0;
+       return scnprintf(buf, PAGE_SIZE, "%lu\n",
+                        PAGES_TO_MiB(sbi->ll_ra_info.ra_max_read_ahead_whole_pages));
 }
 
-static ssize_t
-ll_max_read_ahead_whole_mb_seq_write(struct file *file,
-                                    const char __user *buffer,
-                                    size_t count, loff_t *off)
+static ssize_t max_read_ahead_whole_mb_store(struct kobject *kobj,
+                                            struct attribute *attr,
+                                            const char *buffer, size_t count)
 {
-       struct seq_file *m = file->private_data;
-       struct super_block *sb = m->private;
-       struct ll_sb_info *sbi = ll_s2sbi(sb);
-       s64 ra_max_whole_mb, pages_number;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       u64 ra_max_whole_mb, pages_number;
        int rc;
 
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &ra_max_whole_mb,
-                                          'M');
+       rc = sysfs_memparse(buffer, count, &ra_max_whole_mb, "MiB");
        if (rc)
                return rc;
 
@@ -449,11 +437,11 @@ ll_max_read_ahead_whole_mb_seq_write(struct file *file,
        /* Cap this at the current max readahead window size, the readahead
         * algorithm does this anyway so it's pointless to set it larger.
         */
-       if (pages_number < 0 ||
-           pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
-               CERROR("%s: can't set max_read_ahead_whole_mb=%llu > max_read_ahead_per_file_mb=%lu\n",
+       if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
+               CERROR("%s: cannot set max_read_ahead_whole_mb=%llu > max_read_ahead_per_file_mb=%lu\n",
                       sbi->ll_fsname, PAGES_TO_MiB(pages_number),
                       PAGES_TO_MiB(sbi->ll_ra_info.ra_max_pages_per_file));
+
                return -ERANGE;
        }
 
@@ -463,29 +451,36 @@ ll_max_read_ahead_whole_mb_seq_write(struct file *file,
 
        return count;
 }
-
-LDEBUGFS_SEQ_FOPS(ll_max_read_ahead_whole_mb);
+LUSTRE_RW_ATTR(max_read_ahead_whole_mb);
 
 static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
 {
        struct super_block     *sb    = m->private;
        struct ll_sb_info      *sbi   = ll_s2sbi(sb);
        struct cl_client_cache *cache = sbi->ll_cache;
+       struct ll_ra_info *ra = &sbi->ll_ra_info;
        long max_cached_mb;
        long unused_mb;
 
+       mutex_lock(&cache->ccc_max_cache_mb_lock);
        max_cached_mb = PAGES_TO_MiB(cache->ccc_lru_max);
        unused_mb = PAGES_TO_MiB(atomic_long_read(&cache->ccc_lru_left));
+       mutex_unlock(&cache->ccc_max_cache_mb_lock);
+
        seq_printf(m, "users: %d\n"
                      "max_cached_mb: %ld\n"
                      "used_mb: %ld\n"
                      "unused_mb: %ld\n"
-                     "reclaim_count: %u\n",
+                     "reclaim_count: %u\n"
+                     "max_read_ahead_mb: %lu\n"
+                     "used_read_ahead_mb: %d\n",
                   atomic_read(&cache->ccc_users),
                   max_cached_mb,
                   max_cached_mb - unused_mb,
                   unused_mb,
-                  cache->ccc_lru_shrinkers);
+                  cache->ccc_lru_shrinkers,
+                  PAGES_TO_MiB(ra->ra_max_pages),
+                  PAGES_TO_MiB(atomic_read(&ra->ra_cur_pages)));
        return 0;
 }
 
@@ -501,9 +496,9 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
        long diff = 0;
        long nrpages = 0;
        __u16 refcheck;
-       __s64 pages_number;
+       u64 pages_number;
        int rc;
-       char kernbuf[128];
+       char kernbuf[128], *ptr;
 
        ENTRY;
        if (count >= sizeof(kernbuf))
@@ -511,11 +506,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
        if (copy_from_user(kernbuf, buffer, count))
                RETURN(-EFAULT);
-       kernbuf[count] = 0;
+       kernbuf[count] = '\0';
 
-       buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) -
-                 kernbuf;
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &pages_number, 'M');
+       ptr = lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count);
+       rc = sysfs_memparse(ptr, count, &pages_number, "MiB");
        if (rc)
                RETURN(rc);
 
@@ -530,9 +524,8 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
        /* Allow enough cache so clients can make well-formed RPCs */
        pages_number = max_t(long, pages_number, PTLRPC_MAX_BRW_PAGES);
 
-       spin_lock(&sbi->ll_lock);
+       mutex_lock(&cache->ccc_max_cache_mb_lock);
        diff = pages_number - cache->ccc_lru_max;
-       spin_unlock(&sbi->ll_lock);
 
        /* easy - add more LRU slots. */
        if (diff >= 0) {
@@ -542,7 +535,7 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
        env = cl_env_get(&refcheck);
        if (IS_ERR(env))
-               RETURN(PTR_ERR(env));
+               GOTO(out_unlock, rc = PTR_ERR(env));
 
        diff = -diff;
        while (diff > 0) {
@@ -550,17 +543,21 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
                /* reduce LRU budget from free slots. */
                do {
-                       long ov, nv;
+                       long lru_left_old, lru_left_new, lru_left_ret;
 
-                       ov = atomic_long_read(&cache->ccc_lru_left);
-                       if (ov == 0)
+                       lru_left_old = atomic_long_read(&cache->ccc_lru_left);
+                       if (lru_left_old == 0)
                                break;
 
-                       nv = ov > diff ? ov - diff : 0;
-                       rc = atomic_long_cmpxchg(&cache->ccc_lru_left, ov, nv);
-                       if (likely(ov == rc)) {
-                               diff -= ov - nv;
-                               nrpages += ov - nv;
+                       lru_left_new = lru_left_old > diff ?
+                                       lru_left_old - diff : 0;
+                       lru_left_ret =
+                               atomic_long_cmpxchg(&cache->ccc_lru_left,
+                                                   lru_left_old,
+                                                   lru_left_new);
+                       if (likely(lru_left_old == lru_left_ret)) {
+                               diff -= lru_left_old - lru_left_new;
+                               nrpages += lru_left_old - lru_left_new;
                                break;
                        }
                } while (1);
@@ -573,8 +570,11 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
                        break;
                }
 
+               /* Request extra free slots to avoid them all being used
+                * by other processes before this can continue shrinking.
+                */
+               tmp = diff + min_t(long, diff, MiB_TO_PAGES(1024));
                /* difficult - have to ask OSCs to drop LRU slots. */
-               tmp = diff << 1;
                rc = obd_set_info_async(env, sbi->ll_dt_exp,
                                sizeof(KEY_CACHE_LRU_SHRINK),
                                KEY_CACHE_LRU_SHRINK,
@@ -586,16 +586,15 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
 out:
        if (rc >= 0) {
-               spin_lock(&sbi->ll_lock);
                cache->ccc_lru_max = pages_number;
-               spin_unlock(&sbi->ll_lock);
                rc = count;
        } else {
                atomic_long_add(nrpages, &cache->ccc_lru_left);
        }
+out_unlock:
+       mutex_unlock(&cache->ccc_max_cache_mb_lock);
        return rc;
 }
-
 LDEBUGFS_SEQ_FOPS(ll_max_cached_mb);
 
 static ssize_t checksums_show(struct kobject *kobj, struct attribute *attr,
@@ -730,7 +729,7 @@ static ssize_t statahead_running_max_show(struct kobject *kobj,
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
 
-       return snprintf(buf, 16, "%u\n", sbi->ll_sa_running_max);
+       return scnprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_sa_running_max);
 }
 
 static ssize_t statahead_running_max_store(struct kobject *kobj,
@@ -881,7 +880,7 @@ static ssize_t statfs_max_age_show(struct kobject *kobj, struct attribute *attr,
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
 
-       return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_statfs_max_age);
+       return scnprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_statfs_max_age);
 }
 
 static ssize_t statfs_max_age_store(struct kobject *kobj,
@@ -918,7 +917,9 @@ static ssize_t max_easize_show(struct kobject *kobj,
        if (rc)
                return rc;
 
-       return sprintf(buf, "%u\n", ealen);
+       /* Limit xattr size returned to userspace based on kernel maximum */
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        ealen > XATTR_SIZE_MAX ? XATTR_SIZE_MAX : ealen);
 }
 LUSTRE_RO_ATTR(max_easize);
 
@@ -946,7 +947,9 @@ static ssize_t default_easize_show(struct kobject *kobj,
        if (rc)
                return rc;
 
-       return sprintf(buf, "%u\n", ealen);
+       /* Limit xattr size returned to userspace based on kernel maximum */
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        ealen > XATTR_SIZE_MAX ? XATTR_SIZE_MAX : ealen);
 }
 
 /**
@@ -992,7 +995,7 @@ LUSTRE_RW_ATTR(default_easize);
 
 static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
 {
-       const char *str[] = LL_SBI_FLAGS;
+       const char *const str[] = LL_SBI_FLAGS;
        struct super_block *sb = m->private;
        int flags = ll_s2sbi(sb)->ll_flags;
        int i = 0;
@@ -1091,34 +1094,39 @@ static ssize_t max_read_ahead_async_active_show(struct kobject *kobj,
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
 
-       return snprintf(buf, PAGE_SIZE, "%u\n",
-                       sbi->ll_ra_info.ra_async_max_active);
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        sbi->ll_ra_info.ra_async_max_active);
 }
 
 static ssize_t max_read_ahead_async_active_store(struct kobject *kobj,
-                                               struct attribute *attr,
-                                               const char *buffer,
-                                               size_t count)
+                                                struct attribute *attr,
+                                                const char *buffer,
+                                                size_t count)
 {
-       unsigned int val;
-       int rc;
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
+       unsigned int val;
+       int rc;
 
        rc = kstrtouint(buffer, 10, &val);
        if (rc)
                return rc;
 
-       if (val < 1 || val > WQ_UNBOUND_MAX_ACTIVE) {
-               CERROR("%s: cannot set max_read_ahead_async_active=%u %s than %u\n",
-                      sbi->ll_fsname, val,
-                      val < 1 ? "smaller" : "larger",
-                      val < 1 ? 1 : WQ_UNBOUND_MAX_ACTIVE);
+       /**
+        * It doesn't make any sense to make it exceed what
+        * workqueue could acutally support. This can easily
+        * over subscripe the cores but Lustre internally
+        * throttles to avoid those impacts.
+        */
+       if (val > WQ_UNBOUND_MAX_ACTIVE) {
+               CERROR("%s: cannot set max_read_ahead_async_active=%u larger than %u\n",
+                      sbi->ll_fsname, val, WQ_UNBOUND_MAX_ACTIVE);
                return -ERANGE;
        }
 
+       spin_lock(&sbi->ll_lock);
        sbi->ll_ra_info.ra_async_max_active = val;
-       workqueue_set_max_active(sbi->ll_ra_info.ll_readahead_wq, val);
+       spin_unlock(&sbi->ll_lock);
 
        return count;
 }
@@ -1131,8 +1139,8 @@ static ssize_t read_ahead_async_file_threshold_mb_show(struct kobject *kobj,
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
 
-       return snprintf(buf, PAGE_SIZE, "%lu\n",
-            PAGES_TO_MiB(sbi->ll_ra_info.ra_async_pages_per_file_threshold));
+       return scnprintf(buf, PAGE_SIZE, "%lu\n", PAGES_TO_MiB(
+                        sbi->ll_ra_info.ra_async_pages_per_file_threshold));
 }
 
 static ssize_t
@@ -1165,6 +1173,51 @@ read_ahead_async_file_threshold_mb_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(read_ahead_async_file_threshold_mb);
 
+static ssize_t read_ahead_range_kb_show(struct kobject *kobj,
+                                       struct attribute *attr,char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n",
+                       sbi->ll_ra_info.ra_range_pages << (PAGE_SHIFT - 10));
+}
+
+static ssize_t
+read_ahead_range_kb_store(struct kobject *kobj,
+                              struct attribute *attr,
+                              const char *buffer, size_t count)
+{
+       unsigned long pages_number;
+       unsigned long max_ra_per_file;
+       u64 val;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       int rc;
+
+       rc = sysfs_memparse(buffer, count, &val, "KiB");
+       if (rc < 0)
+               return rc;
+
+       pages_number = val >> PAGE_SHIFT;
+       /* Disable mmap range read */
+       if (pages_number == 0)
+               goto out;
+
+       max_ra_per_file = sbi->ll_ra_info.ra_max_pages_per_file;
+       if (pages_number > max_ra_per_file ||
+           pages_number < RA_MIN_MMAP_RANGE_PAGES)
+               return -ERANGE;
+
+out:
+       spin_lock(&sbi->ll_lock);
+       sbi->ll_ra_info.ra_range_pages = pages_number;
+       spin_unlock(&sbi->ll_lock);
+
+       return count;
+}
+LUSTRE_RW_ATTR(read_ahead_range_kb);
+
 static ssize_t fast_read_show(struct kobject *kobj,
                              struct attribute *attr,
                              char *buf)
@@ -1207,8 +1260,8 @@ static ssize_t file_heat_show(struct kobject *kobj,
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
 
-       return snprintf(buf, PAGE_SIZE, "%u\n",
-                       !!(sbi->ll_flags & LL_SBI_FILE_HEAT));
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        !!(sbi->ll_flags & LL_SBI_FILE_HEAT));
 }
 
 static ssize_t file_heat_store(struct kobject *kobj,
@@ -1243,8 +1296,8 @@ static ssize_t heat_decay_percentage_show(struct kobject *kobj,
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
 
-       return snprintf(buf, PAGE_SIZE, "%u\n",
-                      (sbi->ll_heat_decay_weight * 100 + 128) / 256);
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        (sbi->ll_heat_decay_weight * 100 + 128) / 256);
 }
 
 static ssize_t heat_decay_percentage_store(struct kobject *kobj,
@@ -1277,7 +1330,7 @@ static ssize_t heat_period_second_show(struct kobject *kobj,
        struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
                                              ll_kset.kobj);
 
-       return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_heat_period_second);
+       return scnprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_heat_period_second);
 }
 
 static ssize_t heat_period_second_store(struct kobject *kobj,
@@ -1457,17 +1510,11 @@ out_free_kernbuff:
        OBD_FREE(kernbuf, count + 1);
        return rc ? rc : count;
 }
-LPROC_SEQ_FOPS(ll_pcc);
+LDEBUGFS_SEQ_FOPS(ll_pcc);
 
-struct lprocfs_vars lprocfs_llite_obd_vars[] = {
+struct ldebugfs_vars lprocfs_llite_obd_vars[] = {
        { .name =       "site",
          .fops =       &ll_site_stats_fops                     },
-       { .name =       "max_read_ahead_mb",
-         .fops =       &ll_max_readahead_mb_fops               },
-       { .name =       "max_read_ahead_per_file_mb",
-         .fops =       &ll_max_readahead_per_file_mb_fops      },
-       { .name =       "max_read_ahead_whole_mb",
-         .fops =       &ll_max_read_ahead_whole_mb_fops        },
        { .name =       "max_cached_mb",
          .fops =       &ll_max_cached_mb_fops                  },
        { .name =       "statahead_stats",
@@ -1496,10 +1543,20 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_filestotal.attr,
        &lustre_attr_filesfree.attr,
        &lustre_attr_client_type.attr,
+       &lustre_attr_foreign_symlink_enable.attr,
+       &lustre_attr_foreign_symlink_prefix.attr,
+       &lustre_attr_foreign_symlink_upcall.attr,
+       &lustre_attr_foreign_symlink_upcall_info.attr,
        &lustre_attr_fstype.attr,
        &lustre_attr_uuid.attr,
        &lustre_attr_checksums.attr,
        &lustre_attr_checksum_pages.attr,
+       &lustre_attr_max_read_ahead_mb.attr,
+       &lustre_attr_max_read_ahead_per_file_mb.attr,
+       &lustre_attr_max_read_ahead_whole_mb.attr,
+       &lustre_attr_max_read_ahead_async_active.attr,
+       &lustre_attr_read_ahead_async_file_threshold_mb.attr,
+       &lustre_attr_read_ahead_range_kb.attr,
        &lustre_attr_stats_track_pid.attr,
        &lustre_attr_stats_track_ppid.attr,
        &lustre_attr_stats_track_gid.attr,
@@ -1516,8 +1573,6 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_file_heat.attr,
        &lustre_attr_heat_decay_percentage.attr,
        &lustre_attr_heat_period_second.attr,
-       &lustre_attr_max_read_ahead_async_active.attr,
-       &lustre_attr_read_ahead_async_file_threshold_mb.attr,
        NULL,
 };
 
@@ -1535,61 +1590,62 @@ static struct kobj_type sbi_ktype = {
 };
 
 static const struct llite_file_opcode {
-        __u32       opcode;
-        __u32       type;
-        const char *opname;
+       __u32           opcode;
+       __u32           type;
+       const char      *opname;
 } llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
-        /* file operation */
-        { LPROC_LL_READ_BYTES,     LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
-                                   "read_bytes" },
-        { LPROC_LL_WRITE_BYTES,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
-                                   "write_bytes" },
-        { LPROC_LL_IOCTL,          LPROCFS_TYPE_REGS, "ioctl" },
-        { LPROC_LL_OPEN,           LPROCFS_TYPE_REGS, "open" },
-        { LPROC_LL_RELEASE,        LPROCFS_TYPE_REGS, "close" },
-        { LPROC_LL_MAP,            LPROCFS_TYPE_REGS, "mmap" },
-       { LPROC_LL_FAULT,          LPROCFS_TYPE_REGS, "page_fault" },
-       { LPROC_LL_MKWRITE,        LPROCFS_TYPE_REGS, "page_mkwrite" },
-        { LPROC_LL_LLSEEK,         LPROCFS_TYPE_REGS, "seek" },
-        { LPROC_LL_FSYNC,          LPROCFS_TYPE_REGS, "fsync" },
-        { LPROC_LL_READDIR,        LPROCFS_TYPE_REGS, "readdir" },
-        /* inode operation */
-        { LPROC_LL_SETATTR,        LPROCFS_TYPE_REGS, "setattr" },
-        { LPROC_LL_TRUNC,          LPROCFS_TYPE_REGS, "truncate" },
-        { LPROC_LL_FLOCK,          LPROCFS_TYPE_REGS, "flock" },
-        { LPROC_LL_GETATTR,        LPROCFS_TYPE_REGS, "getattr" },
-        /* dir inode operation */
-        { LPROC_LL_CREATE,         LPROCFS_TYPE_REGS, "create" },
-        { LPROC_LL_LINK,           LPROCFS_TYPE_REGS, "link" },
-        { LPROC_LL_UNLINK,         LPROCFS_TYPE_REGS, "unlink" },
-        { LPROC_LL_SYMLINK,        LPROCFS_TYPE_REGS, "symlink" },
-        { LPROC_LL_MKDIR,          LPROCFS_TYPE_REGS, "mkdir" },
-        { LPROC_LL_RMDIR,          LPROCFS_TYPE_REGS, "rmdir" },
-        { LPROC_LL_MKNOD,          LPROCFS_TYPE_REGS, "mknod" },
-        { LPROC_LL_RENAME,         LPROCFS_TYPE_REGS, "rename" },
+       /* file operation */
+       { LPROC_LL_READ_BYTES,  LPROCFS_TYPE_BYTES_FULL, "read_bytes" },
+       { LPROC_LL_WRITE_BYTES, LPROCFS_TYPE_BYTES_FULL, "write_bytes" },
+       { LPROC_LL_READ,        LPROCFS_TYPE_LATENCY,   "read" },
+       { LPROC_LL_WRITE,       LPROCFS_TYPE_LATENCY,   "write" },
+       { LPROC_LL_IOCTL,       LPROCFS_TYPE_REQS,      "ioctl" },
+       { LPROC_LL_OPEN,        LPROCFS_TYPE_LATENCY,   "open" },
+       { LPROC_LL_RELEASE,     LPROCFS_TYPE_LATENCY,   "close" },
+       { LPROC_LL_MMAP,        LPROCFS_TYPE_LATENCY,   "mmap" },
+       { LPROC_LL_FAULT,       LPROCFS_TYPE_LATENCY,   "page_fault" },
+       { LPROC_LL_MKWRITE,     LPROCFS_TYPE_LATENCY,   "page_mkwrite" },
+       { LPROC_LL_LLSEEK,      LPROCFS_TYPE_LATENCY,   "seek" },
+       { LPROC_LL_FSYNC,       LPROCFS_TYPE_LATENCY,   "fsync" },
+       { LPROC_LL_READDIR,     LPROCFS_TYPE_LATENCY,   "readdir" },
+       /* inode operation */
+       { LPROC_LL_SETATTR,     LPROCFS_TYPE_LATENCY,   "setattr" },
+       { LPROC_LL_TRUNC,       LPROCFS_TYPE_LATENCY,   "truncate" },
+       { LPROC_LL_FLOCK,       LPROCFS_TYPE_LATENCY,   "flock" },
+       { LPROC_LL_GETATTR,     LPROCFS_TYPE_LATENCY,   "getattr" },
+       { LPROC_LL_FALLOCATE,   LPROCFS_TYPE_LATENCY, "fallocate"},
+       /* dir inode operation */
+       { LPROC_LL_CREATE,      LPROCFS_TYPE_LATENCY,   "create" },
+       { LPROC_LL_LINK,        LPROCFS_TYPE_LATENCY,   "link" },
+       { LPROC_LL_UNLINK,      LPROCFS_TYPE_LATENCY,   "unlink" },
+       { LPROC_LL_SYMLINK,     LPROCFS_TYPE_LATENCY,   "symlink" },
+       { LPROC_LL_MKDIR,       LPROCFS_TYPE_LATENCY,   "mkdir" },
+       { LPROC_LL_RMDIR,       LPROCFS_TYPE_LATENCY,   "rmdir" },
+       { LPROC_LL_MKNOD,       LPROCFS_TYPE_LATENCY,   "mknod" },
+       { LPROC_LL_RENAME,      LPROCFS_TYPE_LATENCY,   "rename" },
        /* special inode operation */
-       { LPROC_LL_STATFS,          LPROCFS_TYPE_REGS, "statfs" },
-       { LPROC_LL_ALLOC_INODE,    LPROCFS_TYPE_REGS, "alloc_inode" },
-       { LPROC_LL_SETXATTR,       LPROCFS_TYPE_REGS, "setxattr" },
-       { LPROC_LL_GETXATTR,       LPROCFS_TYPE_REGS, "getxattr" },
-       { LPROC_LL_GETXATTR_HITS,  LPROCFS_TYPE_REGS, "getxattr_hits" },
-       { LPROC_LL_LISTXATTR,      LPROCFS_TYPE_REGS, "listxattr" },
-       { LPROC_LL_REMOVEXATTR,    LPROCFS_TYPE_REGS, "removexattr" },
-       { LPROC_LL_INODE_PERM,     LPROCFS_TYPE_REGS, "inode_permission" },
+       { LPROC_LL_STATFS,      LPROCFS_TYPE_LATENCY,   "statfs" },
+       { LPROC_LL_SETXATTR,    LPROCFS_TYPE_LATENCY,   "setxattr" },
+       { LPROC_LL_GETXATTR,    LPROCFS_TYPE_LATENCY,   "getxattr" },
+       { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REQS,    "getxattr_hits" },
+       { LPROC_LL_LISTXATTR,   LPROCFS_TYPE_LATENCY,   "listxattr" },
+       { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_LATENCY,   "removexattr" },
+       { LPROC_LL_INODE_PERM,  LPROCFS_TYPE_LATENCY,   "inode_permission" },
 };
 
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, long count)
 {
-        if (!sbi->ll_stats)
-                return;
-        if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
-        else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
-                 sbi->ll_stats_track_id == current->pid)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
-        else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
-                 sbi->ll_stats_track_id == current->parent->pid)
-                lprocfs_counter_add(sbi->ll_stats, op, count);
+       if (!sbi->ll_stats)
+               return;
+
+       if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
+       else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
+                sbi->ll_stats_track_id == current->pid)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
+       else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
+                sbi->ll_stats_track_id == current->parent->pid)
+               lprocfs_counter_add(sbi->ll_stats, op, count);
        else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
                 sbi->ll_stats_track_id ==
                        from_kgid(&init_user_ns, current_gid()))
@@ -1597,7 +1653,7 @@ void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
 }
 EXPORT_SYMBOL(ll_stats_ops_tally);
 
-static const char *ra_stat_string[] = {
+static const char *const ra_stat_string[] = {
        [RA_STAT_HIT] = "hits",
        [RA_STAT_MISS] = "misses",
        [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
@@ -1613,13 +1669,14 @@ static const char *ra_stat_string[] = {
        [RA_STAT_FAILED_REACH_END] = "failed to reach end",
        [RA_STAT_ASYNC] = "async readahead",
        [RA_STAT_FAILED_FAST_READ] = "failed to fast read",
+       [RA_STAT_MMAP_RANGE_READ] = "mmap range read",
 };
 
 int ll_debugfs_register_super(struct super_block *sb, const char *name)
 {
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct ll_sb_info *sbi = ll_s2sbi(sb);
-       int err, id, rc;
+       int err, id;
 
        ENTRY;
        LASSERT(sbi);
@@ -1627,35 +1684,21 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
        if (IS_ERR_OR_NULL(llite_root))
                goto out_ll_kset;
 
-       sbi->ll_debugfs_entry = ldebugfs_register(name, llite_root,
-                                                 lprocfs_llite_obd_vars, sb);
-       if (IS_ERR_OR_NULL(sbi->ll_debugfs_entry)) {
-               err = sbi->ll_debugfs_entry ? PTR_ERR(sbi->ll_debugfs_entry) :
-                                             -ENOMEM;
-               sbi->ll_debugfs_entry = NULL;
-               RETURN(err);
-       }
+       sbi->ll_debugfs_entry = debugfs_create_dir(name, llite_root);
+       ldebugfs_add_vars(sbi->ll_debugfs_entry, lprocfs_llite_obd_vars, sb);
 
-       rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "dump_page_cache",0444,
-                                &vvp_dump_pgcache_file_ops, sbi);
-       if (rc)
-               CWARN("Error adding the dump_page_cache file\n");
+       debugfs_create_file("dump_page_cache", 0444, sbi->ll_debugfs_entry, sbi,
+                           &vvp_dump_pgcache_file_ops);
 
-       rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "extents_stats", 0644,
-                                &ll_rw_extents_stats_fops, sbi);
-       if (rc)
-               CWARN("Error adding the extent_stats file\n");
+       debugfs_create_file("extents_stats", 0644, sbi->ll_debugfs_entry, sbi,
+                                &ll_rw_extents_stats_fops);
 
-       rc = ldebugfs_seq_create(sbi->ll_debugfs_entry,
-                                "extents_stats_per_process", 0644,
-                                &ll_rw_extents_stats_pp_fops, sbi);
-       if (rc)
-               CWARN("Error adding the extents_stats_per_process file\n");
+       debugfs_create_file("extents_stats_per_process", 0644,
+                           sbi->ll_debugfs_entry, sbi,
+                           &ll_rw_extents_stats_pp_fops);
 
-       rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "offset_stats", 0644,
-                                &ll_rw_offset_stats_fops, sbi);
-       if (rc)
-               CWARN("Error adding the offset_stats file\n");
+       debugfs_create_file("offset_stats", 0644, sbi->ll_debugfs_entry, sbi,
+                           &ll_rw_offset_stats_fops);
 
        /* File operations stats */
        sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES,
@@ -1666,24 +1709,21 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
        /* do counter init */
        for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) {
                u32 type = llite_opcode_table[id].type;
-               void *ptr = NULL;
+               void *ptr = "unknown";
 
-               if (type & LPROCFS_TYPE_REGS)
-                       ptr = "regs";
+               if (type & LPROCFS_TYPE_REQS)
+                       ptr = "reqs";
                else if (type & LPROCFS_TYPE_BYTES)
                        ptr = "bytes";
-               else if (type & LPROCFS_TYPE_PAGES)
-                       ptr = "pages";
+               else if (type & LPROCFS_TYPE_USEC)
+                       ptr = "usec";
                lprocfs_counter_init(sbi->ll_stats,
-                                    llite_opcode_table[id].opcode,
-                                    (type & LPROCFS_CNTR_AVGMINMAX),
+                                    llite_opcode_table[id].opcode, type,
                                     llite_opcode_table[id].opname, ptr);
        }
 
-       err = ldebugfs_register_stats(sbi->ll_debugfs_entry, "stats",
-                                     sbi->ll_stats);
-       if (err)
-               GOTO(out_stats, err);
+       debugfs_create_file("stats", 0644, sbi->ll_debugfs_entry,
+                           sbi->ll_stats, &ldebugfs_stats_seq_fops);
 
        sbi->ll_ra_stats = lprocfs_alloc_stats(ARRAY_SIZE(ra_stat_string),
                                               LPROCFS_STATS_FLAG_NONE);
@@ -1694,10 +1734,8 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
                lprocfs_counter_init(sbi->ll_ra_stats, id, 0,
                                     ra_stat_string[id], "pages");
 
-       err = ldebugfs_register_stats(sbi->ll_debugfs_entry, "read_ahead_stats",
-                                     sbi->ll_ra_stats);
-       if (err)
-               GOTO(out_ra_stats, err);
+       debugfs_create_file("read_ahead_stats", 0644, sbi->ll_debugfs_entry,
+                           sbi->ll_ra_stats, &ldebugfs_stats_seq_fops);
 
 out_ll_kset:
        /* Yes we also register sysfs mount kset here as well */
@@ -1720,7 +1758,7 @@ out_ra_stats:
 out_stats:
        lprocfs_free_stats(&sbi->ll_stats);
 out_debugfs:
-       ldebugfs_remove(&sbi->ll_debugfs_entry);
+       debugfs_remove_recursive(sbi->ll_debugfs_entry);
 
        RETURN(err);
 }
@@ -1730,8 +1768,7 @@ void ll_debugfs_unregister_super(struct super_block *sb)
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct ll_sb_info *sbi = ll_s2sbi(sb);
 
-       if (!IS_ERR_OR_NULL(sbi->ll_debugfs_entry))
-               ldebugfs_remove(&sbi->ll_debugfs_entry);
+       debugfs_remove_recursive(sbi->ll_debugfs_entry);
 
        if (sbi->ll_dt_obd)
                sysfs_remove_link(&sbi->ll_kset.kobj,
@@ -1769,26 +1806,26 @@ static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
                 write_tot += pp_info->pp_w_hist.oh_buckets[i];
         }
 
-        for(i = 0; i < LL_HIST_MAX; i++) {
-                r = pp_info->pp_r_hist.oh_buckets[i];
-                w = pp_info->pp_w_hist.oh_buckets[i];
-                read_cum += r;
-                write_cum += w;
-               end = BIT(i + LL_HIST_START - units);
+       for(i = 0; i < LL_HIST_MAX; i++) {
+               r = pp_info->pp_r_hist.oh_buckets[i];
+               w = pp_info->pp_w_hist.oh_buckets[i];
+               read_cum += r;
+               write_cum += w;
+               end = 1 << (i + LL_HIST_START - units);
                seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4u %4u  | "
                           "%14lu %4u %4u\n", start, *unitp, end, *unitp,
-                           (i == LL_HIST_MAX - 1) ? '+' : ' ',
-                           r, pct(r, read_tot), pct(read_cum, read_tot),
-                           w, pct(w, write_tot), pct(write_cum, write_tot));
-                start = end;
-               if (start == BIT(10)) {
-                        start = 1;
-                        units += 10;
-                        unitp++;
-                }
-                if (read_cum == read_tot && write_cum == write_tot)
-                        break;
-        }
+                          (i == LL_HIST_MAX - 1) ? '+' : ' ',
+                          r, pct(r, read_tot), pct(read_cum, read_tot),
+                          w, pct(w, write_tot), pct(write_cum, write_tot));
+               start = end;
+               if (start == (1 << 10)) {
+                       start = 1;
+                       units += 10;
+                       unitp++;
+               }
+               if (read_cum == read_tot && write_cum == write_tot)
+                       break;
+       }
 }
 
 static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
@@ -1949,15 +1986,15 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
                 lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
         }
 
-       for(i = 0; (count >= BIT(LL_HIST_START << i)) &&
-             (i < (LL_HIST_MAX - 1)); i++);
-        if (rw == 0) {
-                io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
-                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
-        } else {
-                io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
-                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
-        }
+       for (i = 0; (count >= 1 << (LL_HIST_START + i)) &&
+            (i < (LL_HIST_MAX - 1)); i++);
+       if (rw == 0) {
+               io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
+               io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
+       } else {
+               io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
+               io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
+       }
        spin_unlock(&sbi->ll_pp_extent_lock);
 
        spin_lock(&sbi->ll_process_lock);
@@ -2043,7 +2080,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
        for (i = 0; i < LL_OFFSET_HIST_MAX; i++) {
                if (offset[i].rw_pid != 0)
                        seq_printf(seq,
-                                 "%3c %10d %14llu %14llu %17lu %17lu %14llu\n",
+                                 "%3c %10d %14llu %14llu %17lu %17lu %14lld\n",
                                   offset[i].rw_op == READ ? 'R' : 'W',
                                   offset[i].rw_pid,
                                   offset[i].rw_range_start,
@@ -2057,7 +2094,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
        for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
                if (process[i].rw_pid != 0)
                        seq_printf(seq,
-                                 "%3c %10d %14llu %14llu %17lu %17lu %14llu\n",
+                                 "%3c %10d %14llu %14llu %17lu %17lu %14lld\n",
                                   process[i].rw_op == READ ? 'R' : 'W',
                                   process[i].rw_pid,
                                   process[i].rw_range_start,