+ pgoff_t window_pages;
+
+ window_pages = min(ras->ras_window_pages + ras->ras_rpc_pages,
+ ra->ra_max_pages_per_file);
+ if (window_pages < ras->ras_rpc_pages)
+ ras->ras_window_pages = window_pages;
+ else
+ ras->ras_window_pages = ras_align(ras, window_pages);
+ }
+}
+
+/**
+ * Seek within 8 pages are considered as sequential read for now.
+ */
+static inline bool is_loose_seq_read(struct ll_readahead_state *ras, loff_t pos)
+{
+ return pos_in_window(pos, ras->ras_last_read_end_bytes,
+ 8UL << PAGE_SHIFT, 8UL << PAGE_SHIFT);
+}
+
+static inline bool is_loose_mmap_read(struct ll_sb_info *sbi,
+ struct ll_readahead_state *ras,
+ unsigned long pos)
+{
+ unsigned long range_pages = sbi->ll_ra_info.ra_range_pages;
+
+ return pos_in_window(pos, ras->ras_last_read_end_bytes,
+ range_pages << PAGE_SHIFT,
+ range_pages << PAGE_SHIFT);
+}
+
+/**
+ * We have observed slow mmap read performances for some
+ * applications. The problem is if access pattern is neither
+ * sequential nor stride, but could be still adjacent in a
+ * small range and then seek a random position.
+ *
+ * So the pattern could be something like this:
+ *
+ * [1M data] [hole] [0.5M data] [hole] [0.7M data] [1M data]
+ *
+ *
+ * Every time an application reads mmap data, it may not only
+ * read a single 4KB page, but aslo a cluster of nearby pages in
+ * a range(e.g. 1MB) of the first page after a cache miss.
+ *
+ * The readahead engine is modified to track the range size of
+ * a cluster of mmap reads, so that after a seek and/or cache miss,
+ * the range size is used to efficiently prefetch multiple pages
+ * in a single RPC rather than many small RPCs.
+ */
+static void ras_detect_cluster_range(struct ll_readahead_state *ras,
+ struct ll_sb_info *sbi,
+ unsigned long pos, unsigned long count)
+{
+ pgoff_t last_pages, pages;
+ pgoff_t end_idx = (pos + count - 1) >> PAGE_SHIFT;
+
+ last_pages = ras->ras_range_max_end_idx -
+ ras->ras_range_min_start_idx + 1;
+ /* First time come here */
+ if (!ras->ras_range_max_end_idx)
+ goto out;
+
+ /* Random or Stride read */
+ if (!is_loose_mmap_read(sbi, ras, pos))
+ goto out;
+
+ ras->ras_range_requests++;
+ if (ras->ras_range_max_end_idx < end_idx)
+ ras->ras_range_max_end_idx = end_idx;
+
+ if (ras->ras_range_min_start_idx > (pos >> PAGE_SHIFT))
+ ras->ras_range_min_start_idx = pos >> PAGE_SHIFT;
+
+ /* Out of range, consider it as random or stride */
+ pages = ras->ras_range_max_end_idx -
+ ras->ras_range_min_start_idx + 1;
+ if (pages <= sbi->ll_ra_info.ra_range_pages)
+ return;
+out:
+ ras->ras_last_range_pages = last_pages;
+ ras->ras_range_requests = 0;
+ ras->ras_range_min_start_idx = pos >> PAGE_SHIFT;
+ ras->ras_range_max_end_idx = end_idx;
+}
+
+static void ras_detect_read_pattern(struct ll_readahead_state *ras,
+ struct ll_sb_info *sbi,
+ loff_t pos, size_t count, bool mmap)
+{
+ bool stride_detect = false;
+ pgoff_t index = pos >> PAGE_SHIFT;
+
+ /*
+ * Reset the read-ahead window in two cases. First when the app seeks
+ * or reads to some other part of the file. Secondly if we get a
+ * read-ahead miss that we think we've previously issued. This can
+ * be a symptom of there being so many read-ahead pages that the VM
+ * is reclaiming it before we get to it.
+ */
+ if (!is_loose_seq_read(ras, pos)) {
+ /* Check whether it is in stride I/O mode */
+ if (!read_in_stride_window(ras, pos, count)) {
+ if (ras->ras_consecutive_stride_requests == 0)
+ ras_init_stride_detector(ras, pos, count);
+ else
+ ras_stride_reset(ras);
+ ras->ras_consecutive_bytes = 0;
+ ras_reset(ras, index);
+ } else {
+ ras->ras_consecutive_bytes = 0;
+ ras->ras_consecutive_requests = 0;
+ if (++ras->ras_consecutive_stride_requests > 1)
+ stride_detect = true;
+ RAS_CDEBUG(ras);
+ }
+ ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE);
+ } else if (stride_io_mode(ras)) {
+ /*
+ * If this is contiguous read but in stride I/O mode
+ * currently, check whether stride step still is valid,
+ * if invalid, it will reset the stride ra window to
+ * be zero.
+ */
+ if (!read_in_stride_window(ras, pos, count)) {
+ ras_stride_reset(ras);
+ ras->ras_window_pages = 0;
+ ras->ras_next_readahead_idx = index;
+ }
+ }
+
+ ras->ras_consecutive_bytes += count;
+ if (mmap) {
+ pgoff_t idx = ras->ras_consecutive_bytes >> PAGE_SHIFT;
+ unsigned long ra_range_pages =
+ max_t(unsigned long, RA_MIN_MMAP_RANGE_PAGES,
+ sbi->ll_ra_info.ra_range_pages);
+
+ if ((idx >= ra_range_pages &&
+ idx % ra_range_pages == 0) || stride_detect)
+ ras->ras_need_increase_window = true;
+ } else if ((ras->ras_consecutive_requests > 1 || stride_detect)) {
+ ras->ras_need_increase_window = true;
+ }
+
+ ras->ras_last_read_end_bytes = pos + count - 1;
+}
+
+void ll_ras_enter(struct file *f, loff_t pos, size_t count)
+{
+ struct ll_file_data *fd = f->private_data;
+ struct ll_readahead_state *ras = &fd->fd_ras;
+ struct inode *inode = file_inode(f);
+ unsigned long index = pos >> PAGE_SHIFT;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+
+ spin_lock(&ras->ras_lock);
+ ras->ras_requests++;
+ ras->ras_consecutive_requests++;
+ ras->ras_need_increase_window = false;
+ ras->ras_no_miss_check = false;
+ /*
+ * On the second access to a file smaller than the tunable
+ * ra_max_read_ahead_whole_pages trigger RA on all pages in the
+ * file up to ra_max_pages_per_file. This is simply a best effort
+ * and only occurs once per open file. Normal RA behavior is reverted
+ * to for subsequent IO.
+ */
+ if (ras->ras_requests >= 2) {
+ __u64 kms_pages;
+ struct ll_ra_info *ra = &sbi->ll_ra_info;