* this guarantee, so we need to check that the lock
* matched in ll_file_readv() also covers this page */
__u64 offset = ((loff_t)page->index) << CFS_PAGE_SHIFT;
- if (!obd_get_lock(exp, ll_i2info(inode)->lli_smd,
+ if (!obd_get_lock(exp, ll_i2info(inode)->lli_smd,
&llap->llap_cookie, OBD_BRW_READ,
offset, offset + CFS_PAGE_SIZE - 1,
lockh, flags))
static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
+/* ra_io_arg will be filled in the beginning of ll_readahead with
+ * ras_lock, then the following ll_read_ahead_pages will read RA
+ * pages according to this arg, all the items in this structure are
+ * counted by page index.
+ */
+struct ra_io_arg {
+ unsigned long ria_start; /* start offset of read-ahead*/
+ unsigned long ria_end; /* end offset of read-ahead*/
+ /* If stride read pattern is detected, ria_stoff means where
+ * stride read is started. Note: for normal read-ahead, the
+ * value here is meaningless, and also it will not be accessed*/
+ pgoff_t ria_stoff;
+ /* ria_length and ria_pages are the length and pages length in the
+ * stride I/O mode. And they will also be used to check whether
+ * it is stride I/O read-ahead in the read-ahead pages*/
+ unsigned long ria_length;
+ unsigned long ria_pages;
+};
+
/* WARNING: This algorithm is used to reduce the contention on
* sbi->ll_lock. It should work well if the ra_max_pages is much
* greater than the single file's read-ahead window.
* ll_ra_count_get at the exactly same time. All of them will get a zero ra
* window, although the global window is 100M. -jay
*/
-static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
+static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, struct ra_io_arg *ria,
+ unsigned long len)
{
struct ll_ra_info *ra = &sbi->ll_ra_info;
unsigned long ret = 0;
* otherwise it will form small read RPC(< 1M), which hurt server
* performance a lot.
*/
+ if (ra->ra_max_pages < atomic_read(&ra->ra_cur_pages))
+ GOTO(out, ret = 0);
+
ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), len);
- if ((int)ret < min((unsigned long)PTLRPC_MAX_BRW_PAGES, len))
+ if (ret < min((unsigned long)PTLRPC_MAX_BRW_PAGES, len))
GOTO(out, ret = 0);
+ if (ria->ria_pages == 0)
+ /* it needs 1M align again after trimed by ra_max_pages*/
+ if (ret >= ((ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES))
+ ret -= (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES;
+
if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
atomic_sub(ret, &ra->ra_cur_pages);
ret = 0;
}
+
out:
RETURN(ret);
}
return rc;
}
-/* ra_io_arg will be filled in the beginning of ll_readahead with
- * ras_lock, then the following ll_read_ahead_pages will read RA
- * pages according to this arg, all the items in this structure are
- * counted by page index.
- */
-struct ra_io_arg {
- unsigned long ria_start; /* start offset of read-ahead*/
- unsigned long ria_end; /* end offset of read-ahead*/
- /* If stride read pattern is detected, ria_stoff means where
- * stride read is started. Note: for normal read-ahead, the
- * value here is meaningless, and also it will not be accessed*/
- pgoff_t ria_stoff;
- /* ria_length and ria_pages are the length and pages length in the
- * stride I/O mode. And they will also be used to check whether
- * it is stride I/O read-ahead in the read-ahead pages*/
- unsigned long ria_length;
- unsigned long ria_pages;
-};
-
#define RIA_DEBUG(ria) \
CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
/**
- * Current readahead process
+ * Current readahead process
* read_syscall
* |
* ll_file_readv (init ll_readahead_state for the open file)
* |---> ll_readpage (read page)
* | |
* | |
- * | ras_update (update read-ahead window according to read pattern)
- * | |
+ * | ras_update (update read-ahead window according to read pattern)
+ * | |
* | |
* |--- ll_readahead (read_ahead pages)
*
*
* During this process, ras_update controls how many ahead pages it should
- * read by adjusting read-ahead window(RA window).The window is represented
+ * read by adjusting read-ahead window(RA window).The window is represented
* by following three varibles (all these values are counted by pages)
- *
- * 1. ras_window_start: start offset of the read-ahead window. It is
+ *
+ * 1. ras_window_start: start offset of the read-ahead window. It is
* initialized as the read offset, then as pages
- * are being read, it will be set as the last
+ * are being read, it will be set as the last
* page(Note: it is 1M aligned, so it actually
- * is last_page_index & ~index & (~(256 - 1));
- *
+ * is last_page_index & ~index & (~(256 - 1));
+ *
* 2. ras_window_len: length of the read-ahead window. The read-ahead window
* length is decided by two factors
- *
+ *
* a. It is at least >= current read syscall length.
* b. If continguous read is detected, (Note: it is syscall
* continguous, intead of page-read contingous) the
* time.
* c. If stride read pattern is detected, the read-ahead
* window will also be increased 1M but by stride pattern.
- * stride pattern is defined by ras_stride_length,
- * ras_stride_pages and ras_stride_gap. (see
+ * stride pattern is defined by ras_stride_length,
+ * ras_stride_pages and ras_stride_gap. (see
* ll_readahead_states comments)
*
* 3. ras_next_readahead: current offset in the read-ahead window, i.e. where
* ll_readahead will start in next next-ahead.
- *
- *
+ *
+ *
* Cache miss: If memory load is very high, it begins to evicted the page from cache,
* also includes read-ahead pages, once we found read-ahead page is being evicted before
- * it is "really" accessed, it will reset the read-ahead window to the current read extent
+ * it is "really" accessed, it will reset the read-ahead window to the current read extent
* i.e. from current page to the end of this read.
*
* In flight read-ahead amount is controlled by 2 varible (read-ahead rate)
* ra_max_pages: how much max in-flight read-ahead pages on the client.
* ra_max_pages_per_file: how much max in-flight read-ahead pages per file.
**/
-
-
-
static int ll_readahead(struct ll_readahead_state *ras,
struct obd_export *exp, struct address_space *mapping,
struct obd_io_group *oig, int flags)
if (len == 0)
RETURN(0);
- reserved = ll_ra_count_get(ll_i2sbi(inode), len);
+ reserved = ll_ra_count_get(ll_i2sbi(inode), &ria, len);
if (reserved < len)
ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
else
ras->ras_window_len = min(ras->ras_window_len +
(unsigned long)step,
- ra->ra_max_pages);
+ ra->ra_max_pages_per_file);
}
static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
GOTO(out, rc = PTR_ERR(llap));
}
- if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages_per_file)
+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages_per_file &&
+ ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index,
llap->llap_defer_uptodate);
if (llap->llap_defer_uptodate) {
/* This is the callpath if we got the page from a readahead */
llap->llap_ra_used = 1;
- rc = ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
- fd->fd_flags);
+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages_per_file &&
+ ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
+ rc = ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
+ fd->fd_flags);
if (rc > 0)
obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd,
NULL, oig);
LL_CDEBUG_PAGE(D_PAGE, page, "queued readpage\n");
/* We have just requested the actual page we want, see if we can tack
* on some readahead to that page's RPC before it is sent. */
- if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages_per_file)
+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages_per_file &&
+ ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
fd->fd_flags);
rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
-
out:
if (rc)
unlock_page(page);