Whamcloud - gitweb
LU-10602 llite: add file heat support 99/34399/10
authorLi Xi <lixi@ddn.com>
Mon, 5 Feb 2018 03:57:54 +0000 (22:57 -0500)
committerOleg Drokin <green@whamcloud.com>
Mon, 1 Apr 2019 07:24:16 +0000 (07:24 +0000)
File heat is a special attribute fo files/objects which reflects
the access frequency of the files/objects.
File heat is mainly desinged for cache management. Caches like
PCC can use file heat to determine which files to be removed from
the cache or which files to fetch into cache.
This patch adds file heat support on llite level.

Signed-off-by: Li Xi <lixi@ddn.com>
Signed-off-by: Qian Yingjin <qian@ddn.com>
Change-Id: I168fc657f0c859311e5114191b60047646909be0
Reviewed-on: https://review.whamcloud.com/34399
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
17 files changed:
lustre/doc/Makefile.am
lustre/doc/lfs-heat.1 [new file with mode: 0644]
lustre/doc/llapi_heat_get.3 [new file with mode: 0644]
lustre/doc/llapi_heat_set.3 [new file with mode: 0644]
lustre/include/lustre/lustreapi.h
lustre/include/obd_class.h
lustre/include/obd_support.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/lproc_llite.c
lustre/obdclass/class_obd.c
lustre/tests/sanity.sh
lustre/utils/Makefile.am
lustre/utils/lfs.c
lustre/utils/liblustreapi_heat.c [new file with mode: 0644]

index 628326e..349df82 100644 (file)
@@ -42,6 +42,7 @@ MANFILES =                                    \
        lfs-find.1                              \
        lfs-getstripe.1                         \
        lfs-getdirstripe.1                      \
+       lfs-heat.1                              \
        lfs-hsm.1                               \
        lfs-ladvise.1                           \
        lfs_migrate.1                           \
@@ -66,6 +67,8 @@ MANFILES =                                    \
        llapi_file_open.3                       \
        llapi_group_lock.3                      \
        llapi_group_unlock.3                    \
+       llapi_heat_get.3                        \
+       llapi_heat_set.3                        \
        llapi_hsm_action_begin.3                \
        llapi_hsm_action_end.3                  \
        llapi_hsm_action_get_dfid.3             \
diff --git a/lustre/doc/lfs-heat.1 b/lustre/doc/lfs-heat.1
new file mode 100644 (file)
index 0000000..752a182
--- /dev/null
@@ -0,0 +1,69 @@
+.TH lfs-heat 1 "Feb. 09, 2019" Lustre "Lustre utility"
+.SH NAME
+.B lfs heat_command
+lfs commands used to interact with file heat features
+.SH SYNOPSIS
+.B lfs heat_get|heat_set
+.IR \fR<\fIFILE \fR...>
+.br
+.SH DESCRIPTION
+These are a set of lfs commands used to interact with Lustre file heat feature.
+Currently file heat is only stored in memory with file inode, it might be reset
+to be zero at any time with the release of the inode due to memory reclaim.
+.TP
+.B lfs heat_get  \fR<\fIFILE \fR...>
+Get file heat on file list.
+.TP
+.B lfs heat_set [\fB--clear\fR|\fB-c\fR] [\fB--off\fR|\fB-o\fR] [\fB--on\fR|\fB-O\fR] \fR<\fIFILE \fR...>
+Set provided file heat flags on file list.
+.SH OPTIONS
+.TP
+.BR --clear | -c
+Clear file heat on given files.
+.TP
+.BR --off | -o
+Turn off file heat on given files.
+.TP
+.BR --on | -O
+Turn on file heat on given files.
+.SH EXAMPLES
+.TP
+Turn on file heat support for the Lustre filesystem:
+.B $ lctl set_param llite.$FSNAME*.file_heat=1
+.TP
+Trun off file heat support for the Lustre filesystem:
+.B $ lctl set_param llite.$FSNAME*.file_heat=0
+.TP
+Display current file heat for foo:
+.B $ lfs heat_get /mnt/lustre/foo
+.br
+flags: 0
+.br
+readsample: 0
+.br
+writesample: 16
+.br
+readbyte: 0
+.br
+writebyte: 16777216
+.br
+
+.TP
+Clear the file heat for foo:
+.B $ lfs heat_set -c /mnt/lustre/foo
+.TP
+Turn off file heat for foo:
+.B $ lfs heat_set -o /mnt/lustre/foo
+.TP
+Turn on file heat for foo:
+.B $ lfs heat_set -O /mnt/lustre/foo
+.SH AUTHOR
+The
+.B lfs heat
+command is part of the
+.BR Lustre (7)
+filesystem.
+
+.SH SEE ALSO
+.BR lfs (1)
+.BR lustre (7)
diff --git a/lustre/doc/llapi_heat_get.3 b/lustre/doc/llapi_heat_get.3
new file mode 100644 (file)
index 0000000..840d639
--- /dev/null
@@ -0,0 +1,65 @@
+.TH llapi_heat_get 3 "2019 Feb 09" "Lustre User API"
+.SH NAME
+llapi_heat_get, llapi_heat_set \- get and clear heat for a file
+.SH SYNOPSIS
+.nf
+.B #include <lustre/lustreapi.h>
+.PP
+.BI "int llapi_heat_get(int " fd ", struct lu_heat *" heat ");"
+
+.BI "int llapi_heat_set(int " fd ", __u64 " flags ");"
+.fi
+.SH DESCRIPTION
+.PP
+The function
+.B llapi_heat_get()
+returns file access frequency information on the file descriptor
+.BR fd .
+Information is returned in the
+.I heat
+argument which should already be allocated,  which is a
+.B lu_heat
+data structure, which contains the following fields:
+.nf
+.LP
+struct lu_heat {
+       __u32 lh_heat_count;
+       __u32 lh_padding1;
+       __u64 lh_heat[0];
+};
+.fi
+The function
+.B llapi_heat_set()
+mainly clears the heat information on the file descriptor
+.I fd
+according to the parameter
+.I flags
+which gives options for file heat, currently one of:
+.TP
+LU_HEAT_FLAG_CLEAR
+Clear the heat information for a given file.
+.TP
+LU_HEAT_FLAG_OFF
+Turn off the file heat support for a given file.
+
+.SH RETURN VALUES
+.LP
+.B llapi_heat_get()
+and
+.B llapi_heat_set()
+return 0 on success or a negative errno value on failure.
+.SH ERRORS
+.TP 15
+.SM -ENOMEM
+Insufficient memory to complete operation.
+.TP
+.SM -EFAULT
+Memory region is not properly mapped.
+.TP
+.SM -EINVAL
+One or more invalid arguments are given.
+.TP
+.SM EOPNOTSUPP
+File heat operation is not supported.
+.SH "SEE ALSO"
+.BR lustreapi (7)
diff --git a/lustre/doc/llapi_heat_set.3 b/lustre/doc/llapi_heat_set.3
new file mode 100644 (file)
index 0000000..0483e0f
--- /dev/null
@@ -0,0 +1 @@
+.so man3/llapi_heat_get.3
index 87762b4..ba1e603 100644 (file)
@@ -1008,6 +1008,9 @@ ssize_t llapi_mirror_copy_many(int fd, __u16 src, __u16 *dst, size_t count);
 int llapi_mirror_copy(int fd, unsigned int src, unsigned int dst,
                       off_t pos, size_t count);
 
+int llapi_heat_get(int fd, struct lu_heat *heat);
+int llapi_heat_set(int fd, __u64 flags);
+
 /** @} llapi */
 
 #if defined(__cplusplus)
index cd3f0b5..18e1628 100644 (file)
@@ -1844,4 +1844,15 @@ extern struct miscdevice obd_psdev;
 int obd_ioctl_getdata(char **buf, int *len, void __user *arg);
 int class_procfs_init(void);
 int class_procfs_clean(void);
+
+extern void obd_heat_add(struct obd_heat_instance *instance,
+                        unsigned int time_second, __u64 count,
+                        unsigned int weight, unsigned int period_second);
+extern void obd_heat_decay(struct obd_heat_instance *instance,
+                          __u64 time_second, unsigned int weight,
+                          unsigned int period_second);
+extern __u64 obd_heat_get(struct obd_heat_instance *instance,
+                         unsigned int time_second, unsigned int weight,
+                         unsigned int period_second);
+extern void obd_heat_clear(struct obd_heat_instance *instance, int count);
 #endif /* __LINUX_OBD_CLASS_H */
index bf32221..0d522cb 100644 (file)
@@ -945,4 +945,10 @@ static inline int lustre_to_lma_flags(__u32 la_flags)
        return (la_flags & LUSTRE_ORPHAN_FL) ? LMAI_ORPHAN : 0;
 }
 
+struct obd_heat_instance {
+       __u64 ohi_heat;
+       __u64 ohi_time_second;
+       __u64 ohi_count;
+};
+
 #endif
index a99aaa2..a3bd790 100644 (file)
@@ -477,6 +477,8 @@ struct ll_ioc_lease_id {
 #define LL_IOC_FID2MDTIDX              _IOWR('f', 248, struct lu_fid)
 #define LL_IOC_GETPARENT               _IOWR('f', 249, struct getparent)
 #define LL_IOC_LADVISE                 _IOR('f', 250, struct llapi_lu_ladvise)
+#define LL_IOC_HEAT_GET                        _IOWR('f', 251, struct lu_heat)
+#define LL_IOC_HEAT_SET                        _IOW('f', 252, long)
 
 #ifndef        FS_IOC_FSGETXATTR
 /*
@@ -2186,6 +2188,36 @@ enum lockahead_results {
        LLA_RESULT_SAME,
 };
 
+enum lu_heat_flag_bit {
+       LU_HEAT_FLAG_BIT_INVALID = 0,
+       LU_HEAT_FLAG_BIT_OFF,
+       LU_HEAT_FLAG_BIT_CLEAR,
+};
+
+#define LU_HEAT_FLAG_CLEAR     (1 << LU_HEAT_FLAG_BIT_CLEAR)
+#define LU_HEAT_FLAG_OFF       (1 << LU_HEAT_FLAG_BIT_OFF)
+
+enum obd_heat_type {
+       OBD_HEAT_READSAMPLE     = 0,
+       OBD_HEAT_WRITESAMPLE    = 1,
+       OBD_HEAT_READBYTE       = 2,
+       OBD_HEAT_WRITEBYTE      = 3,
+       OBD_HEAT_COUNT
+};
+
+#define LU_HEAT_NAMES {                                        \
+       [OBD_HEAT_READSAMPLE]   = "readsample",         \
+       [OBD_HEAT_WRITESAMPLE]  = "writesample",        \
+       [OBD_HEAT_READBYTE]     = "readbyte",           \
+       [OBD_HEAT_WRITEBYTE]    = "writebyte",          \
+}
+
+struct lu_heat {
+       __u32 lh_count;
+       __u32 lh_flags;
+       __u64 lh_heat[0];
+};
+
 #if defined(__cplusplus)
 }
 #endif
index b429b2b..33da782 100644 (file)
@@ -1369,6 +1369,37 @@ static void ll_io_init(struct cl_io *io, struct file *file, enum cl_io_type iot)
        ll_io_set_mirror(io, file);
 }
 
+static void ll_heat_add(struct inode *inode, enum cl_io_type iot,
+                       __u64 count)
+{
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct ll_sb_info *sbi = ll_i2sbi(inode);
+       enum obd_heat_type sample_type;
+       enum obd_heat_type iobyte_type;
+       __u64 now = ktime_get_real_seconds();
+
+       if (!ll_sbi_has_file_heat(sbi) ||
+           lli->lli_heat_flags & LU_HEAT_FLAG_OFF)
+               return;
+
+       if (iot == CIT_READ) {
+               sample_type = OBD_HEAT_READSAMPLE;
+               iobyte_type = OBD_HEAT_READBYTE;
+       } else if (iot == CIT_WRITE) {
+               sample_type = OBD_HEAT_WRITESAMPLE;
+               iobyte_type = OBD_HEAT_WRITEBYTE;
+       } else {
+               return;
+       }
+
+       spin_lock(&lli->lli_heat_lock);
+       obd_heat_add(&lli->lli_heat_instances[sample_type], now, 1,
+                    sbi->ll_heat_decay_weight, sbi->ll_heat_period_second);
+       obd_heat_add(&lli->lli_heat_instances[iobyte_type], now, count,
+                    sbi->ll_heat_decay_weight, sbi->ll_heat_period_second);
+       spin_unlock(&lli->lli_heat_lock);
+}
+
 static ssize_t
 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
                   struct file *file, enum cl_io_type iot,
@@ -1499,6 +1530,8 @@ out:
        }
 
        CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
+       if (result > 0)
+               ll_heat_add(inode, iot, result);
 
        RETURN(result > 0 ? result : rc);
 }
@@ -1559,9 +1592,11 @@ ll_do_fast_read(struct kiocb *iocb, struct iov_iter *iter)
        if (result == -ENODATA)
                result = 0;
 
-       if (result > 0)
+       if (result > 0) {
+               ll_heat_add(file_inode(iocb->ki_filp), CIT_READ, result);
                ll_stats_ops_tally(ll_i2sbi(file_inode(iocb->ki_filp)),
                                LPROC_LL_READ_BYTES, result);
+       }
 
        return result;
 }
@@ -1649,6 +1684,7 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter)
                result = 0;
 
        if (result > 0) {
+               ll_heat_add(inode, CIT_WRITE, result);
                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES,
                                   result);
                ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED);
@@ -3238,6 +3274,41 @@ static long ll_file_set_lease(struct file *file, struct ll_ioc_lease *ioc,
        RETURN(rc);
 }
 
+static void ll_heat_get(struct inode *inode, struct lu_heat *heat)
+{
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct ll_sb_info *sbi = ll_i2sbi(inode);
+       __u64 now = ktime_get_real_seconds();
+       int i;
+
+       spin_lock(&lli->lli_heat_lock);
+       heat->lh_flags = lli->lli_heat_flags;
+       for (i = 0; i < heat->lh_count; i++)
+               heat->lh_heat[i] = obd_heat_get(&lli->lli_heat_instances[i],
+                                               now, sbi->ll_heat_decay_weight,
+                                               sbi->ll_heat_period_second);
+       spin_unlock(&lli->lli_heat_lock);
+}
+
+static int ll_heat_set(struct inode *inode, __u64 flags)
+{
+       struct ll_inode_info *lli = ll_i2info(inode);
+       int rc = 0;
+
+       spin_lock(&lli->lli_heat_lock);
+       if (flags & LU_HEAT_FLAG_CLEAR)
+               obd_heat_clear(lli->lli_heat_instances, OBD_HEAT_COUNT);
+
+       if (flags & LU_HEAT_FLAG_OFF)
+               lli->lli_heat_flags |= LU_HEAT_FLAG_OFF;
+       else
+               lli->lli_heat_flags &= ~LU_HEAT_FLAG_OFF;
+
+       spin_unlock(&lli->lli_heat_lock);
+
+       RETURN(rc);
+}
+
 static long
 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -3621,6 +3692,37 @@ out_ladvise:
                RETURN(ll_ioctl_fssetxattr(inode, cmd, arg));
        case BLKSSZGET:
                RETURN(put_user(PAGE_SIZE, (int __user *)arg));
+       case LL_IOC_HEAT_GET: {
+               struct lu_heat uheat;
+               struct lu_heat *heat;
+               int size;
+
+               if (copy_from_user(&uheat, (void __user *)arg, sizeof(uheat)))
+                       RETURN(-EFAULT);
+
+               if (uheat.lh_count > OBD_HEAT_COUNT)
+                       uheat.lh_count = OBD_HEAT_COUNT;
+
+               size = offsetof(typeof(uheat), lh_heat[uheat.lh_count]);
+               OBD_ALLOC(heat, size);
+               if (heat == NULL)
+                       RETURN(-ENOMEM);
+
+               heat->lh_count = uheat.lh_count;
+               ll_heat_get(inode, heat);
+               rc = copy_to_user((char __user *)arg, heat, size);
+               OBD_FREE(heat, size);
+               RETURN(rc ? -EFAULT : 0);
+       }
+       case LL_IOC_HEAT_SET: {
+               __u64 flags;
+
+               if (copy_from_user(&flags, (void __user *)arg, sizeof(flags)))
+                       RETURN(-EFAULT);
+
+               rc = ll_heat_set(inode, flags);
+               RETURN(rc);
+       }
        default:
                RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
                                     (void __user *)arg));
index 974ab92..168f16e 100644 (file)
@@ -199,6 +199,11 @@ struct ll_inode_info {
                        /* for writepage() only to communicate to fsync */
                        int                     lli_async_rc;
 
+                       /* protect the file heat fields */
+                       spinlock_t                      lli_heat_lock;
+                       __u32                           lli_heat_flags;
+                       struct obd_heat_instance        lli_heat_instances[OBD_HEAT_COUNT];
+
                        /*
                         * Whenever a process try to read/write the file, the
                         * jobid of the process will be saved here, and it'll
@@ -456,7 +461,7 @@ enum stats_track_type {
 /*     LL_SBI_PIO          0x1000000    parallel IO support, introduced in
                                         2.10, abandoned */
 #define LL_SBI_TINY_WRITE   0x2000000 /* tiny write support */
-
+#define LL_SBI_FILE_HEAT    0x4000000 /* file heat support */
 #define LL_SBI_FLAGS {         \
        "nolck",        \
        "checksum",     \
@@ -484,6 +489,7 @@ enum stats_track_type {
        "file_secctx",  \
        "pio",          \
        "tiny_write",   \
+       "file_heat",    \
 }
 
 /* This is embedded into llite super-blocks to keep track of connect
@@ -572,8 +578,14 @@ struct ll_sb_info {
 
        struct kset               ll_kset;      /* sysfs object */
        struct completion         ll_kobj_unregister;
+
+       /* File heat */
+       unsigned int              ll_heat_decay_weight;
+       unsigned int              ll_heat_period_second;
 };
 
+#define SBI_DEFAULT_HEAT_DECAY_WEIGHT  ((80 * 256 + 50) / 100)
+#define SBI_DEFAULT_HEAT_PERIOD_SECOND (60)
 /*
  * per file-descriptor read-ahead data.
  */
@@ -731,6 +743,11 @@ static inline bool ll_sbi_has_tiny_write(struct ll_sb_info *sbi)
        return !!(sbi->ll_flags & LL_SBI_TINY_WRITE);
 }
 
+static inline bool ll_sbi_has_file_heat(struct ll_sb_info *sbi)
+{
+       return !!(sbi->ll_flags & LL_SBI_FILE_HEAT);
+}
+
 void ll_ras_enter(struct file *f);
 
 /* llite/lcommon_misc.c */
index 325cd8d..83d9b8c 100644 (file)
@@ -142,6 +142,9 @@ static struct ll_sb_info *ll_init_sbi(void)
        INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
        init_rwsem(&sbi->ll_squash.rsi_sem);
 
+       /* Per-filesystem file heat */
+       sbi->ll_heat_decay_weight = SBI_DEFAULT_HEAT_DECAY_WEIGHT;
+       sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND;
        RETURN(sbi);
 }
 
@@ -973,6 +976,9 @@ void ll_lli_init(struct ll_inode_info *lli)
                INIT_LIST_HEAD(&lli->lli_agl_list);
                lli->lli_agl_index = 0;
                lli->lli_async_rc = 0;
+               spin_lock_init(&lli->lli_heat_lock);
+               obd_heat_clear(lli->lli_heat_instances, OBD_HEAT_COUNT);
+               lli->lli_heat_flags = 0;
        }
        mutex_init(&lli->lli_layout_mutex);
        memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid));
index ac3cfdb..631b43e 100644 (file)
@@ -1098,6 +1098,109 @@ static ssize_t fast_read_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(fast_read);
 
+static ssize_t file_heat_show(struct kobject *kobj,
+                             struct attribute *attr,
+                             char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                       !!(sbi->ll_flags & LL_SBI_FILE_HEAT));
+}
+
+static ssize_t file_heat_store(struct kobject *kobj,
+                              struct attribute *attr,
+                              const char *buffer,
+                              size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       spin_lock(&sbi->ll_lock);
+       if (val)
+               sbi->ll_flags |= LL_SBI_FILE_HEAT;
+       else
+               sbi->ll_flags &= ~LL_SBI_FILE_HEAT;
+       spin_unlock(&sbi->ll_lock);
+
+       return count;
+}
+LUSTRE_RW_ATTR(file_heat);
+
+static ssize_t heat_decay_percentage_show(struct kobject *kobj,
+                                         struct attribute *attr,
+                                         char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n",
+                      (sbi->ll_heat_decay_weight * 100 + 128) / 256);
+}
+
+static ssize_t heat_decay_percentage_store(struct kobject *kobj,
+                                          struct attribute *attr,
+                                          const char *buffer,
+                                          size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned long val;
+       int rc;
+
+       rc = kstrtoul(buffer, 10, &val);
+       if (rc)
+               return rc;
+
+       if (val < 0 || val > 100)
+               return -ERANGE;
+
+       sbi->ll_heat_decay_weight = (val * 256 + 50) / 100;
+
+       return count;
+}
+LUSTRE_RW_ATTR(heat_decay_percentage);
+
+static ssize_t heat_period_second_show(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_heat_period_second);
+}
+
+static ssize_t heat_period_second_store(struct kobject *kobj,
+                                       struct attribute *attr,
+                                       const char *buffer,
+                                       size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned long val;
+       int rc;
+
+       rc = kstrtoul(buffer, 10, &val);
+       if (rc)
+               return rc;
+
+       if (val <= 0)
+               return -ERANGE;
+
+       sbi->ll_heat_period_second = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(heat_period_second);
+
 static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
 {
        struct super_block      *sb    = m->private;
@@ -1270,6 +1373,9 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_xattr_cache.attr,
        &lustre_attr_fast_read.attr,
        &lustre_attr_tiny_write.attr,
+       &lustre_attr_file_heat.attr,
+       &lustre_attr_heat_decay_percentage.attr,
+       &lustre_attr_heat_period_second.attr,
        NULL,
 };
 
index b160d35..0945292 100644 (file)
@@ -818,6 +818,90 @@ static void __exit obdclass_exit(void)
        EXIT;
 }
 
+void obd_heat_clear(struct obd_heat_instance *instance, int count)
+{
+       ENTRY;
+
+       memset(instance, 0, sizeof(*instance) * count);
+       RETURN_EXIT;
+}
+EXPORT_SYMBOL(obd_heat_clear);
+
+/*
+ * The file heat is calculated for every time interval period I. The access
+ * frequency during each period is counted. The file heat is only recalculated
+ * at the end of a time period.  And a percentage of the former file heat is
+ * lost when recalculated. The recursion formula to calculate the heat of the
+ * file f is as follow:
+ *
+ * Hi+1(f) = (1-P)*Hi(f)+ P*Ci
+ *
+ * Where Hi is the heat value in the period between time points i*I and
+ * (i+1)*I; Ci is the access count in the period; the symbol P refers to the
+ * weight of Ci. The larger the value the value of P is, the more influence Ci
+ * has on the file heat.
+ */
+void obd_heat_decay(struct obd_heat_instance *instance,  __u64 time_second,
+                   unsigned int weight, unsigned int period_second)
+{
+       u64 second;
+
+       ENTRY;
+
+       if (instance->ohi_time_second > time_second) {
+               obd_heat_clear(instance, 1);
+               RETURN_EXIT;
+       }
+
+       if (instance->ohi_time_second == 0)
+               RETURN_EXIT;
+
+       for (second = instance->ohi_time_second + period_second;
+            second < time_second;
+            second += period_second) {
+               instance->ohi_heat = instance->ohi_heat *
+                               (256 - weight) / 256 +
+                               instance->ohi_count * weight / 256;
+               instance->ohi_count = 0;
+               instance->ohi_time_second = second;
+       }
+       RETURN_EXIT;
+}
+EXPORT_SYMBOL(obd_heat_decay);
+
+__u64 obd_heat_get(struct obd_heat_instance *instance, unsigned int time_second,
+                  unsigned int weight, unsigned int period_second)
+{
+       ENTRY;
+
+       obd_heat_decay(instance, time_second, weight, period_second);
+
+       if (instance->ohi_count == 0)
+               RETURN(instance->ohi_heat);
+
+       RETURN(instance->ohi_heat * (256 - weight) / 256 +
+              instance->ohi_count * weight / 256);
+}
+EXPORT_SYMBOL(obd_heat_get);
+
+void obd_heat_add(struct obd_heat_instance *instance,
+                 unsigned int time_second,  __u64 count,
+                 unsigned int weight, unsigned int period_second)
+{
+       ENTRY;
+
+       obd_heat_decay(instance, time_second, weight, period_second);
+       if (instance->ohi_time_second == 0) {
+               instance->ohi_time_second = time_second;
+               instance->ohi_heat = 0;
+               instance->ohi_count = count;
+       } else {
+               instance->ohi_count += count;
+       }
+       RETURN_EXIT;
+}
+EXPORT_SYMBOL(obd_heat_add);
+
 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
 MODULE_DESCRIPTION("Lustre Class Driver");
 MODULE_VERSION(LUSTRE_VERSION_STRING);
index 8d9a2fe..531b149 100755 (executable)
@@ -20409,6 +20409,177 @@ test_812() {
 }
 run_test 812 "do not drop reqs generated when imp is going to idle (LU-11951)"
 
+test_813() {
+       local file_heat_sav=$($LCTL get_param -n llite.*.file_heat 2>/dev/null)
+       [ -z "$file_heat_sav" ] && skip "no file heat support"
+
+       local readsample
+       local writesample
+       local readbyte
+       local writebyte
+       local readsample1
+       local writesample1
+       local readbyte1
+       local writebyte1
+
+       local period_second=$($LCTL get_param -n llite.*.heat_period_second)
+       local decay_pct=$($LCTL get_param -n llite.*.heat_decay_percentage)
+
+       $LCTL set_param -n llite.*.file_heat=1
+       echo "Turn on file heat"
+       echo "Period second: $period_second, Decay percentage: $decay_pct"
+
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+
+       local out=$($LFS heat_get $DIR/$tfile)
+
+       $LFS heat_get $DIR/$tfile
+       readsample=$(echo "$out" | grep 'readsample' | awk '{ print $2 }')
+       writesample=$(echo "$out" | grep 'writesample' | awk '{ print $2 }')
+       readbyte=$(echo "$out" | grep 'readbyte' | awk '{ print $2 }')
+       writebyte=$(echo "$out" | grep 'writebyte' | awk '{ print $2 }')
+
+       [ $readsample -le 4 ] || error "read sample ($readsample) is wrong"
+       [ $writesample -le 3 ] || error "write sample ($writesample) is wrong"
+       [ $readbyte -le 20 ] || error "read bytes ($readbyte) is wrong"
+       [ $writebyte -le 15 ] || error "write bytes ($writebyte) is wrong"
+
+       sleep $((period_second + 3))
+       echo "Sleep $((period_second + 3)) seconds..."
+       # The recursion formula to calculate the heat of the file f is as
+       # follow:
+       # Hi+1(f) = (1-P)*Hi(f)+ P*Ci
+       # Where Hi is the heat value in the period between time points i*I and
+       # (i+1)*I; Ci is the access count in the period; the symbol P refers
+       # to the weight of Ci.
+       out=$($LFS heat_get $DIR/$tfile)
+       $LFS heat_get $DIR/$tfile
+       readsample=$(echo "$out" | grep 'readsample' | awk '{ print $2 }')
+       writesample=$(echo "$out" | grep 'writesample' | awk '{ print $2 }')
+       readbyte=$(echo "$out" | grep 'readbyte' | awk '{ print $2 }')
+       writebyte=$(echo "$out" | grep 'writebyte' | awk '{ print $2 }')
+
+       [ $(bc <<< "$readsample <= 4 * $decay_pct / 100") -eq 1 ] ||
+               error "read sample ($readsample) is wrong"
+       [ $(bc <<< "$writesample <= 3 * $decay_pct / 100") -eq 1 ] ||
+               error "write sample ($writesample) is wrong"
+       [ $(bc <<< "$readbyte <= 20 * $decay_pct / 100") -eq 1 ] ||
+               error "read bytes ($readbyte) is wrong"
+       [ $(bc <<< "$writebyte <= 15 * $decay_pct / 100") -eq 1 ] ||
+               error "write bytes ($writebyte) is wrong"
+
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+
+       sleep $((period_second + 3))
+       echo "Sleep $((period_second + 3)) seconds..."
+
+       out=$($LFS heat_get $DIR/$tfile)
+       $LFS heat_get $DIR/$tfile
+       readsample1=$(echo "$out" | grep 'readsample' | awk '{ print $2 }')
+       writesample1=$(echo "$out" | grep 'writesample' | awk '{ print $2 }')
+       readbyte1=$(echo "$out" | grep 'readbyte' | awk '{ print $2 }')
+       writebyte1=$(echo "$out" | grep 'writebyte' | awk '{ print $2 }')
+
+       [ $(bc <<< "$readsample1 <= ($readsample * (100 - $decay_pct) + \
+               4 * $decay_pct) / 100") -eq 1 ] ||
+               error "read sample ($readsample1) is wrong"
+       [ $(bc <<< "$writesample1 <= ($writesample * (100 - $decay_pct) + \
+               3 * $decay_pct) / 100") -eq 1 ] ||
+               error "write sample ($writesample1) is wrong"
+       [ $(bc <<< "$readbyte1 <= ($readbyte * (100 - $decay_pct) + \
+               20 * $decay_pct) / 100") -eq 1 ] ||
+               error "read bytes ($readbyte1) is wrong"
+       [ $(bc <<< "$writebyte1 <= ($writebyte * (100 - $decay_pct) + \
+               15 * $decay_pct) / 100") -eq 1 ] ||
+               error "write bytes ($writebyte1) is wrong"
+
+       echo "Turn off file heat for the file $DIR/$tfile"
+       $LFS heat_set -o $DIR/$tfile
+
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+
+       out=$($LFS heat_get $DIR/$tfile)
+       $LFS heat_get $DIR/$tfile
+       readsample=$(echo "$out" | grep 'readsample' | awk '{ print $2 }')
+       writesample=$(echo "$out" | grep 'writesample' | awk '{ print $2 }')
+       readbyte=$(echo "$out" | grep 'readbyte' | awk '{ print $2 }')
+       writebyte=$(echo "$out" | grep 'writebyte' | awk '{ print $2 }')
+
+       [ $readsample -eq 0 ] || error "read sample ($readsample) is wrong"
+       [ $writesample -eq 0 ] || error "write sample ($writesample) is wrong"
+       [ $readbyte -eq 0 ] || error "read bytes ($readbyte) is wrong"
+       [ $writebyte -eq 0 ] || error "write bytes ($writebyte) is wrong"
+
+       echo "Trun on file heat for the file $DIR/$tfile"
+       $LFS heat_set -O $DIR/$tfile
+
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+
+       out=$($LFS heat_get $DIR/$tfile)
+       $LFS heat_get $DIR/$tfile
+       readsample=$(echo "$out" | grep 'readsample' | awk '{ print $2 }')
+       writesample=$(echo "$out" | grep 'writesample' | awk '{ print $2 }')
+       readbyte=$(echo "$out" | grep 'readbyte' | awk '{ print $2 }')
+       writebyte=$(echo "$out" | grep 'writebyte' | awk '{ print $2 }')
+
+       [ $readsample -gt 0 ] || error "read sample ($readsample) is wrong"
+       [ $writesample -gt 0 ] || error "write sample ($writesample) is wrong"
+       [ $readbyte -gt 0 ] || error "read bytes ($readbyte) is wrong"
+       [ $writebyte -gt 0 ] || error "write bytes ($writebyte) is wrong"
+
+       $LFS heat_set -c $DIR/$tfile
+       $LCTL set_param -n llite.*.file_heat=0
+       echo "Turn off file heat support for the Lustre filesystem"
+
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       echo "QQQQ" > $DIR/$tfile
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+       cat $DIR/$tfile > /dev/null
+
+       out=$($LFS heat_get $DIR/$tfile)
+       $LFS heat_get $DIR/$tfile
+       readsample=$(echo "$out" | grep 'readsample' | awk '{ print $2 }')
+       writesample=$(echo "$out" | grep 'writesample' | awk '{ print $2 }')
+       readbyte=$(echo "$out" | grep 'readbyte' | awk '{ print $2 }')
+       writebyte=$(echo "$out" | grep 'writebyte' | awk '{ print $2 }')
+
+       [ $readsample -eq 0 ] || error "read sample ($readsample) is wrong"
+       [ $writesample -eq 0 ] || error "write sample ($writesample) is wrong"
+       [ $readbyte -eq 0 ] || error "read bytes ($readbyte) is wrong"
+       [ $writebyte -eq 0 ] || error "write bytes ($writebyte) is wrong"
+
+       $LCTL set_param -n llite.*.file_heat=$file_heat_sav
+       rm -f $DIR/$tfile
+}
+run_test 813 "File heat verfication"
+
 #
 # tests that do cleanup/setup should be run at the end
 #
index 4d7b56b..e795970 100644 (file)
@@ -104,7 +104,8 @@ liblustreapi_la_SOURCES = liblustreapi.c liblustreapi_hsm.c \
                          liblustreapi_lease.c liblustreapi_util.c \
                          liblustreapi_kernelconn.c liblustreapi_param.c \
                          liblustreapi_mirror.c \
-                         liblustreapi_ladvise.c liblustreapi_chlg.c
+                         liblustreapi_ladvise.c liblustreapi_chlg.c \
+                         liblustreapi_heat.c
 liblustreapi_la_LDFLAGS = $(LIBREADLINE) -version-info 1:0:0 \
                          -Wl,--version-script=liblustreapi.map
 liblustreapi_la_LIBADD = $(top_builddir)/libcfs/libcfs/libcfs.la
index b16af99..cc625b6 100644 (file)
@@ -119,6 +119,8 @@ static int lfs_swap_layouts(int argc, char **argv);
 static int lfs_mv(int argc, char **argv);
 static int lfs_ladvise(int argc, char **argv);
 static int lfs_getsom(int argc, char **argv);
+static int lfs_heat_get(int argc, char **argv);
+static int lfs_heat_set(int argc, char **argv);
 static int lfs_mirror(int argc, char **argv);
 static int lfs_mirror_list_commands(int argc, char **argv);
 static int lfs_list_commands(int argc, char **argv);
@@ -605,6 +607,15 @@ command_t cmdlist[] = {
         "\t-s: Only show the size value of the SOM data for a given file\n"
         "\t-b: Only show the blocks value of the SOM data for a given file\n"
         "\t-f: Only show the flags value of the SOM data for a given file\n"},
+       {"heat_get", lfs_heat_get, 0,
+        "To get heat of files.\n"
+        "usage: heat_get <file> ...\n"},
+       {"heat_set", lfs_heat_set, 0,
+        "To set heat flags of files.\n"
+        "usage: heat_set [--clear|-c] [--off|-o] [--on|-O] <file> ...\n"
+        "\t--clear|-c: Clear file heat for given files\n"
+        "\t--off|-o:   Turn off file heat for given files\n"
+        "\t--on|-O:    Turn on file heat for given files\n"},
        {"help", Parser_help, 0, "help"},
        {"exit", Parser_quit, 0, "quit"},
        {"quit", Parser_quit, 0, "quit"},
@@ -7897,6 +7908,131 @@ next:
        return rc;
 }
 
+
+static const char *const heat_names[] = LU_HEAT_NAMES;
+
+static int lfs_heat_get(int argc, char **argv)
+{
+       struct lu_heat  *heat;
+       int              rc = 0, rc2;
+       char            *path;
+       int              fd;
+       int              i;
+
+       if (argc <= 1)
+               return CMD_HELP;
+
+       heat = calloc(sizeof(*heat) + sizeof(__u64) * OBD_HEAT_COUNT, 1);
+       if (!heat) {
+               fprintf(stderr, "%s: memory allocation failed\n", argv[0]);
+               return -ENOMEM;
+       }
+
+       optind = 1;
+       while (optind < argc) {
+               path = argv[optind++];
+
+               fd = open(path, O_RDONLY);
+               if (fd < 0) {
+                       fprintf(stderr, "%s: cannot open file '%s': %s\n",
+                               argv[0], path, strerror(errno));
+                       rc2 = -errno;
+                       goto next;
+               }
+
+               heat->lh_count = OBD_HEAT_COUNT;
+               rc2 = llapi_heat_get(fd, heat);
+               close(fd);
+               if (rc2 < 0) {
+                       fprintf(stderr, "%s: cannot get heat of file '%s'"
+                               ": %s\n", argv[0], path, strerror(errno));
+                       goto next;
+               }
+
+               printf("flags: %x\n", heat->lh_flags);
+               for (i = 0; i < heat->lh_count; i++)
+                       printf("%s: %llu\n", heat_names[i], heat->lh_heat[i]);
+next:
+               if (rc == 0 && rc2 < 0)
+                       rc = rc2;
+       }
+
+       free(heat);
+       return rc;
+}
+
+static int lfs_heat_set(int argc, char **argv)
+{
+       struct option    long_opts[] = {
+               {"clear", no_argument, 0, 'c'},
+               {"off", no_argument, 0, 'o'},
+               {"on", no_argument, 0, 'O'},
+               {0, 0, 0, 0}
+       };
+       char             short_opts[] = "coO";
+       int              rc = 0, rc2;
+       char            *path;
+       int              fd;
+       __u64            flags = 0;
+       int              c;
+
+       if (argc <= 1)
+               return CMD_HELP;
+
+       optind = 0;
+       while ((c = getopt_long(argc, argv, short_opts,
+                               long_opts, NULL)) != -1) {
+               switch (c) {
+               case 'c':
+                       flags |= LU_HEAT_FLAG_CLEAR;
+                       break;
+               case 'o':
+                       flags |= LU_HEAT_FLAG_CLEAR;
+                       flags |= LU_HEAT_FLAG_OFF;
+                       break;
+               case 'O':
+                       flags &= ~LU_HEAT_FLAG_OFF;
+                       break;
+               case '?':
+                       return CMD_HELP;
+               default:
+                       fprintf(stderr, "%s: option '%s' unrecognized\n",
+                               argv[0], argv[optind - 1]);
+                       return CMD_HELP;
+               }
+       }
+
+       if (argc <= optind) {
+               fprintf(stderr, "%s: please give one or more file names\n",
+                       argv[0]);
+               return CMD_HELP;
+       }
+
+       while (optind < argc) {
+               path = argv[optind++];
+
+               fd = open(path, O_RDONLY);
+               if (fd < 0) {
+                       fprintf(stderr, "%s: cannot open file '%s': %s\n",
+                               argv[0], path, strerror(errno));
+                       rc2 = -errno;
+                       goto next;
+               }
+
+               rc2 = llapi_heat_set(fd, flags);
+               close(fd);
+               if (rc2 < 0) {
+                       fprintf(stderr, "%s: cannot setflags heat of file '%s'"
+                               ": %s\n", argv[0], path, strerror(errno));
+                       goto next;
+               }
+next:
+               if (rc == 0 && rc2 < 0)
+                       rc = rc2;
+       }
+       return rc;
+}
+
 /** The input string contains a comma delimited list of component ids and
  * ranges, for example "1,2-4,7".
  */
diff --git a/lustre/utils/liblustreapi_heat.c b/lustre/utils/liblustreapi_heat.c
new file mode 100644 (file)
index 0000000..b703444
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * LGPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * (C) Copyright (c) 2018, DataDirect Networks Inc, all rights reserved.
+ *
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the GNU Lesser General Public License
+ * LGPL version 2.1 or (at your discretion) any later version.
+ * LGPL version 2.1 accompanies this distribution, and is available at
+ * http://www.gnu.org/licenses/lgpl-2.1.html
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * LGPL HEADER END
+ */
+/*
+ * lustre/utils/liblustreapi_heat.c
+ *
+ * lustreapi library for heat
+ *
+ * Author: Li Xi <lixi@ddn.com>
+ */
+
+#include <lustre/lustreapi.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+
+#include <libcfs/util/ioctl.h>
+#include <lustre/lustreapi.h>
+#include <linux/lustre/lustre_ioctl.h>
+#include "lustreapi_internal.h"
+
+/*
+ * Get heat of a file
+ *
+ * \param fd       File to get heat.
+ * \param heat     Buffer to save heat.
+ *
+ * \retval 0 on success.
+ * \retval -errno on failure.
+ */
+int llapi_heat_get(int fd, struct lu_heat *heat)
+{
+       int rc;
+
+       rc = ioctl(fd, LL_IOC_HEAT_GET, heat);
+       if (rc < 0) {
+               llapi_error(LLAPI_MSG_ERROR, -errno, "cannot get heat");
+               return -errno;
+       }
+       return 0;
+}
+
+/*
+ * Set heat of a file
+ *
+ * \param fd       File to get heat.
+ * \param heat     Buffer to save heat.
+ *
+ * \retval 0 on success.
+ * \retval -errno on failure.
+ */
+int llapi_heat_set(int fd, __u64 flags)
+{
+       int rc;
+
+       rc = ioctl(fd, LL_IOC_HEAT_SET, &flags);
+       if (rc < 0) {
+               llapi_error(LLAPI_MSG_ERROR, -errno, "cannot set heat flags");
+               return -errno;
+       }
+       return 0;
+}