typedef __u16 __le16;
typedef __u32 __le32;
#endif
-
+
/*
struct lustre_intent_data {
__u64 it_lock_handle[2];
struct ll_dir_entry {
/* number of inode, referenced by this entry */
- __le32 lde_inode;
+ __le32 lde_inode;
/* total record length, multiple of LL_DIR_PAD */
- __le16 lde_rec_len;
+ __le16 lde_rec_len;
/* length of name */
- __u8 lde_name_len;
+ __u8 lde_name_len;
/* file type: regular, directory, device, etc. */
- __u8 lde_file_type;
+ __u8 lde_file_type;
/* name. NOT NUL-terminated */
- char lde_name[LL_DIR_NAME_LEN];
+ char lde_name[LL_DIR_NAME_LEN];
};
struct ll_dentry_data {
* dir statahead.
*/
pid_t lli_opendir_pid;
- /*
+ /*
* since parent-child threads can share the same @file struct,
* "opendir_key" is the token when dir close for case of parent exit
* before child -- it is me should cleanup the dir readahead. */
[RA_STAT_EOF] = "read-ahead to EOF", \
[RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue", \
[RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",\
-}
+}
struct ll_ra_info {
atomic_t ra_cur_pages;
unsigned long ra_max_pages;
+ unsigned long ra_max_pages_per_file;
unsigned long ra_max_read_ahead_whole_pages;
};
atomic_t llpd_sample_count;
unsigned long llpd_reblnc_count;
/* the pages in this list shouldn't be over this number */
- unsigned long llpd_budget;
+ unsigned long llpd_budget;
int llpd_cpu;
/* which page the pglist data is in */
- struct page *llpd_page;
+ struct page *llpd_page;
/* stats */
unsigned long llpd_hit;
struct ll_sb_info {
struct list_head ll_list;
- /* this protects pglist(only ll_async_page_max) and ra_info.
+ /* this protects pglist(only ll_async_page_max) and ra_info.
* It isn't safe to grab from interrupt contexts. */
spinlock_t ll_lock;
spinlock_t ll_pp_extent_lock; /* Lock for pp_extent entries */
#define LL_PGLIST_DATA(sbi) LL_PGLIST_DATA_CPU(sbi, smp_processor_id())
static inline struct ll_pglist_data *ll_pglist_cpu_lock(
- struct ll_sb_info *sbi,
+ struct ll_sb_info *sbi,
int cpu)
{
spin_lock(&sbi->ll_pglist[cpu]->llpd_lock);
}
static inline struct ll_pglist_data *ll_pglist_double_lock(
- struct ll_sb_info *sbi,
+ struct ll_sb_info *sbi,
int cpu, struct ll_pglist_data **pd_cpu)
{
int current_cpu = cfs_get_cpu();
unsigned long ras_consecutive_pages;
/*
* number of read requests after the last read-ahead window reset
- * As window is reset on each seek, this is effectively the number
+ * As window is reset on each seek, this is effectively the number
* on consecutive read request and is used to trigger read-ahead.
*/
unsigned long ras_consecutive_requests;
*/
unsigned long ras_requests;
/*
- * Page index with respect to the current request, these value
+ * Page index with respect to the current request, these value
* will not be accurate when dealing with reads issued via mmap.
*/
unsigned long ras_request_index;
* protected by ->ras_lock.
*/
struct list_head ras_read_beads;
- /*
+ /*
* The following 3 items are used for detecting the stride I/O
- * mode.
- * In stride I/O mode,
- * ...............|-----data-----|****gap*****|--------|******|....
- * offset |-stride_pages-|-stride_gap-|
+ * mode.
+ * In stride I/O mode,
+ * ...............|-----data-----|****gap*****|--------|******|....
+ * offset |-stride_pages-|-stride_gap-|
* ras_stride_offset = offset;
* ras_stride_length = stride_pages + stride_gap;
* ras_stride_pages = stride_pages;
unsigned long ras_stride_length;
unsigned long ras_stride_pages;
pgoff_t ras_stride_offset;
- /*
+ /*
* number of consecutive stride request count, and it is similar as
* ras_consecutive_requests, but used for stride I/O mode.
* Note: only more than 2 consecutive stride request are detected,
#define ll_unregister_cache(cache) do {} while (0)
#endif
-void ll_ra_read_init(struct file *f, struct ll_ra_read *rar,
+void ll_ra_read_init(struct file *f, struct ll_ra_read *rar,
loff_t offset, size_t count);
void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
struct ll_ra_read *ll_ra_read_get(struct file *f);
int ll_file_open(struct inode *inode, struct file *file);
int ll_file_release(struct inode *inode, struct file *file);
int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
-int ll_glimpse_ioctl(struct ll_sb_info *sbi,
+int ll_glimpse_ioctl(struct ll_sb_info *sbi,
struct lov_stripe_md *lsm, lstat_t *st);
int ll_glimpse_size(struct inode *inode, int ast_flags);
int ll_local_open(struct file *file,
struct ptlrpc_request **request);
int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
int set_default);
-int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm,
+int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm,
int *lmm_size, struct ptlrpc_request **request);
int ll_fsync(struct file *file, struct dentry *dentry, int data);
int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
* "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
* will bypass interacting with statahead thread for checking:
* "lld_sa_generation == lli_sai->sai_generation"
- */
+ */
if (ldd && lli->lli_sai &&
ldd->lld_sa_generation == lli->lli_sai->sai_generation)
return -EAGAIN;
* Parameters:
* @magic: Dynamic ioctl call routine will feed this vaule with the pointer
* returned to ll_iocontrol_register. Callback functions should use this
- * data to check the potential collasion of ioctl cmd. If collasion is
+ * data to check the potential collasion of ioctl cmd. If collasion is
* found, callback function should return LLIOC_CONT.
* @rcp: The result of ioctl command.
*
* Return values:
- * If @magic matches the pointer returned by ll_iocontrol_data, the
+ * If @magic matches the pointer returned by ll_iocontrol_data, the
* callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
*/
-typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
+typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
struct file *file, unsigned int cmd, unsigned long arg,
void *magic, int *rcp);
-enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg, int *rcp);
/* export functions */
-/* Register ioctl block dynamatically for a regular file.
+/* Register ioctl block dynamatically for a regular file.
*
* @cmd: the array of ioctl command set
* @count: number of commands in the @cmd
- * @cb: callback function, it will be called if an ioctl command is found to
+ * @cb: callback function, it will be called if an ioctl command is found to
* belong to the command list @cmd.
*
* Return vaule:
- * A magic pointer will be returned if success;
- * otherwise, NULL will be returned.
+ * A magic pointer will be returned if success;
+ * otherwise, NULL will be returned.
* */
void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
void ll_iocontrol_unregister(void *magic);
OBD_MD_FLATIME | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLGENER |
OBD_MD_FLBLOCKS;
if (srvlock) {
- /* set OBD_MD_FLFLAGS in o_valid, only if we
+ /* set OBD_MD_FLFLAGS in o_valid, only if we
* set OBD_FL_TRUNCLOCK, otherwise ost_punch
* and filter_setattr get confused, see the comment
* in ost_punch */
int srvlock = test_bit(LLI_F_SRVLOCK, &lli->lli_flags);
loff_t new_size;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",inode->i_ino,
- inode->i_generation, inode, i_size_read(inode), i_size_read(inode));
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",
+ inode->i_ino, inode->i_generation, inode, i_size_read(inode),
+ i_size_read(inode));
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1);
if (lli->lli_size_sem_owner != current) {
struct ost_lvb lvb;
int rc;
- /* XXX I'm pretty sure this is a hack to paper over a more fundamental
- * race condition. */
+ /* XXX I'm pretty sure this is a hack to paper over a more
+ * fundamental race condition. */
lov_stripe_lock(lli->lli_smd);
inode_init_lvb(inode, &lvb);
rc = obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 0);
inode->i_blocks = lvb.lvb_blocks;
if (lvb.lvb_size == i_size_read(inode) && rc == 0) {
- CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n",
+ CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64
+ ", %Lu=%#Lx\n",
lli->lli_smd->lsm_object_id, i_size_read(inode),
i_size_read(inode));
lov_stripe_unlock(lli->lli_smd);
* with the removepage path which gets the page lock then the
* cli lock */
if(!clear_page_dirty_for_io(page)) {
- unlock_page(page);
- RETURN(-EAGAIN);
- }
+ unlock_page(page);
+ RETURN(-EAGAIN);
+ }
/* This actually clears the dirty bit in the radix tree.*/
set_page_writeback(page);
*
* 1) Further extending writes may have landed in the page cache
* since a partial write first queued this page requiring us
- * to write more from the page cache. (No further races are possible, since
- * by the time this is called, the page is locked.)
+ * to write more from the page cache. (No further races are possible,
+ * since by the time this is called, the page is locked.)
* 2) We might have raced with truncate and want to avoid performing
* write RPCs that are just going to be thrown away by the
* truncate's punch on the storage targets.
* If llaps in the list are being moved they will only move to the end
* of the LRU, and we aren't terribly interested in those pages here (we
* start at the beginning of the list where the least-used llaps are. */
-static inline int llap_shrink_cache_internal(struct ll_sb_info *sbi,
+static inline int llap_shrink_cache_internal(struct ll_sb_info *sbi,
int cpu, int target)
{
struct ll_async_page *llap, dummy_llap = { .llap_magic = 0xd11ad11a };
ll_pglist_cpu_lock(sbi, cpu);
}
- llap = llite_pglist_next_llap(head,
+ llap = llite_pglist_next_llap(head,
&dummy_llap.llap_pglist_item);
list_del_init(&dummy_llap.llap_pglist_item);
if (llap == NULL)
PageWriteback(page) || (!PageUptodate(page) &&
llap->llap_origin != LLAP_ORIGIN_READAHEAD));
- LL_CDEBUG_PAGE(D_PAGE, page,"%s LRU page: %s%s%s%s%s origin %s\n",
+ LL_CDEBUG_PAGE(D_PAGE, page,
+ "%s LRU page: %s%s%s%s%s origin %s\n",
keep ? "keep" : "drop",
llap->llap_write_queued ? "wq " : "",
PageDirty(page) ? "pd " : "",
ll_truncate_complete_page(page);
++count;
} else {
- LL_CDEBUG_PAGE(D_PAGE, page, "Not dropping page"
- " because it is "
- "%s\n",
- PageDirty(page)?
- "dirty":"mapped");
+ LL_CDEBUG_PAGE(D_PAGE, page,
+ "Not dropping page because it is"
+ " %s\n", PageDirty(page) ?
+ "dirty" : "mapped");
}
}
unlock_page(page);
/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction.
*
* At first, this code calculates total pages wanted by @shrink_fraction, then
- * it deduces how many pages should be reaped from each cpu in proportion as
+ * it deduces how many pages should be reaped from each cpu in proportion as
* their own # of page count(llpd_count).
*/
int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction)
LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_budget += surplus;
spin_unlock(&sbi->ll_async_page_reblnc_lock);
- /* TODO: do we really need to call llap_shrink_cache_internal
+ /* TODO: do we really need to call llap_shrink_cache_internal
* for every cpus with its page_count greater than budget?
- * for_each_cpu_mask(cpu, mask)
- * ll_shrink_cache_internal(...)
+ * for_each_cpu_mask(cpu, mask)
+ * ll_shrink_cache_internal(...)
*/
return 0;
if (target > 0) {
rc = 0;
atomic_inc(&pd->llpd_sample_count);
- if (atomic_read(&pd->llpd_sample_count) >
+ if (atomic_read(&pd->llpd_sample_count) >
sbi->ll_async_page_sample_max) {
pd->llpd_reblnc_count++;
rc = llap_async_cache_rebalance(sbi);
target = pd->llpd_count - pd->llpd_budget;
}
/* if rc equals 1, it means other cpu is doing the rebalance
- * job, and our budget # would be modified when we read it.
+ * job, and our budget # would be modified when we read it.
* Furthermore, it is much likely being increased because
* we have already reached the rebalance threshold. In this
* case, we skip to shrink cache here. */
static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
-/* WARNING: This algorithm is used to reduce the contention on
- * sbi->ll_lock. It should work well if the ra_max_pages is much
+/* WARNING: This algorithm is used to reduce the contention on
+ * sbi->ll_lock. It should work well if the ra_max_pages is much
* greater than the single file's read-ahead window.
*
- * TODO: There may exist a `global sync problem' in this implementation.
+ * TODO: There may exist a `global sync problem' in this implementation.
* Considering the global ra window is 100M, and each file's ra window is 10M,
- * there are over 10 files trying to get its ra budget and reach
+ * there are over 10 files trying to get its ra budget and reach
* ll_ra_count_get at the exactly same time. All of them will get a zero ra
* window, although the global window is 100M. -jay
*/
static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
{
struct ll_ra_info *ra = &sbi->ll_ra_info;
- unsigned long ret;
+ unsigned long ret = 0;
ENTRY;
+ /**
+ * If read-ahead pages left are less than 1M, do not do read-ahead,
+ * otherwise it will form small read RPC(< 1M), which hurt server
+ * performance a lot.
+ */
ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), len);
- if ((int)ret < 0)
+ if ((int)ret < min((unsigned long)PTLRPC_MAX_BRW_PAGES, len))
GOTO(out, ret = 0);
if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
llap->llap_ra_used = 0;
rc = obd_queue_group_io(exp, ll_i2info(page->mapping->host)->lli_smd,
NULL, oig, llap->llap_cookie, OBD_BRW_READ, 0,
- CFS_PAGE_SIZE, 0, ASYNC_COUNT_STABLE | ASYNC_READY |
- ASYNC_URGENT);
+ CFS_PAGE_SIZE, 0, ASYNC_COUNT_STABLE |
+ ASYNC_READY | ASYNC_URGENT);
if (rc) {
LL_CDEBUG_PAGE(D_ERROR, page, "read queue failed: rc %d\n", rc);
page_cache_release(page);
#define RAS_CDEBUG(ras) \
CDEBUG(D_READA, \
"lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu" \
- "csr %lu sf %lu sp %lu sl %lu \n", \
+ "csr %lu sf %lu sp %lu sl %lu \n", \
ras->ras_last_readpage, ras->ras_consecutive_requests, \
ras->ras_consecutive_pages, ras->ras_window_start, \
ras->ras_window_len, ras->ras_next_readahead, \
- ras->ras_requests, ras->ras_request_index, \
+ ras->ras_requests, ras->ras_request_index, \
ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
ras->ras_stride_pages, ras->ras_stride_length)
return &fd->fd_ras;
}
-void ll_ra_read_init(struct file *f, struct ll_ra_read *rar,
+void ll_ra_read_init(struct file *f, struct ll_ra_read *rar,
loff_t offset, size_t count)
{
struct ll_readahead_state *ras;
if (page->mapping != mapping) {
ll_ra_stats_inc(mapping, RA_STAT_WRONG_GRAB_PAGE);
CDEBUG(D_READA, "g_c_p_n returned invalid page\n");
- GOTO(unlock_page, rc = 0);
+ GOTO(unlock_page, rc = 0);
}
/* we do this first so that we can see the page in the /proc
GOTO(unlock_page, rc = -ENOLCK);
}
CDEBUG(D_READA, "read-ahead page\n");
- GOTO(unlock_page, rc = 0);
+ GOTO(unlock_page, rc = 0);
}
/* skip completed pages */
if (Page_Uptodate(page))
- GOTO(unlock_page, rc = 0);
+ GOTO(unlock_page, rc = 0);
/* bail out when we hit the end of the lock. */
rc = ll_issue_page_read(exp, llap, oig, 1);
LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "started read-ahead\n");
rc = 1;
} else {
-unlock_page:
+unlock_page:
unlock_page(page);
LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "skipping read-ahead\n");
}
unsigned long ria_pages;
};
-#define RIA_DEBUG(ria) \
+#define RIA_DEBUG(ria) \
CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
ria->ria_pages)
static int ll_read_ahead_pages(struct obd_export *exp,
struct obd_io_group *oig,
- struct ra_io_arg *ria,
+ struct ra_io_arg *ria,
unsigned long *reserved_pages,
struct address_space *mapping,
unsigned long *ra_end)
if (ras_inside_ra_window(page_idx, ria)) {
/* If the page is inside the read-ahead window*/
rc = ll_read_ahead_page(exp, oig, page_idx, mapping);
- if (rc == 1) {
- (*reserved_pages)--;
- count ++;
- } else if (rc == -ENOLCK)
- break;
+ if (rc == 1) {
+ (*reserved_pages)--;
+ count ++;
+ } else if (rc == -ENOLCK)
+ break;
} else if (stride_ria) {
/* If it is not in the read-ahead window, and it is
* read-ahead mode, then check whether it should skip
* the stride gap */
- pgoff_t offset;
+ pgoff_t offset;
/* FIXME: This assertion only is valid when it is for
* forward read-ahead, it will be fixed when backward
* read-ahead is implemented */
" offset %lu \n", page_idx, ria->ria_stoff);
offset = page_idx - ria->ria_stoff;
- offset = offset % (ria->ria_length);
- if (offset > ria->ria_pages) {
- page_idx += ria->ria_length - offset;
+ offset = offset % (ria->ria_length);
+ if (offset > ria->ria_pages) {
+ page_idx += ria->ria_length - offset;
CDEBUG(D_READA, "i %lu skip %lu \n", page_idx,
ria->ria_length - offset);
continue;
/* Enlarge the RA window to encompass the full read */
if (bead != NULL && ras->ras_window_start + ras->ras_window_len <
bead->lrr_start + bead->lrr_count) {
- obd_off read_end = (bead->lrr_start + bead->lrr_count) <<
+ obd_off read_end = (bead->lrr_start + bead->lrr_count) <<
CFS_PAGE_SHIFT;
- obd_extent_calc(exp, lsm, OBD_CALC_STRIPE_RPC_END_ALIGN,
+ obd_extent_calc(exp, lsm, OBD_CALC_STRIPE_RPC_END_ALIGN,
&read_end);
- ras->ras_window_len = ((read_end + 1) >> CFS_PAGE_SHIFT) -
+ ras->ras_window_len = ((read_end + 1) >> CFS_PAGE_SHIFT) -
ras->ras_window_start;
}
- /* Reserve a part of the read-ahead window that we'll be issuing */
+ /* Reserve a part of the read-ahead window that we'll be issuing */
if (ras->ras_window_len) {
start = ras->ras_next_readahead;
end = ras->ras_window_start + ras->ras_window_len - 1;
ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
CDEBUG(D_READA, "reserved page %lu \n", reserved);
-
+
ret = ll_read_ahead_pages(exp, oig, &ria, &reserved, mapping, &ra_end);
LASSERTF(reserved >= 0, "reserved %lu\n", reserved);
if (ra_end < ras->ras_next_readahead &&
index_in_window(ra_end, ras->ras_window_start, 0,
ras->ras_window_len)) {
- ras->ras_next_readahead = ra_end;
- RAS_CDEBUG(ras);
+ ras->ras_next_readahead = ra_end;
+ RAS_CDEBUG(ras);
}
spin_unlock(&ras->ras_lock);
}
INIT_LIST_HEAD(&ras->ras_read_beads);
}
-/*
+/*
* Check whether the read request is in the stride window.
* If it is in the stride window, return 1, otherwise return 0.
*/
struct inode *inode)
{
unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-
+
if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0)
return 0;
/* If it is contiguous read */
if (stride_gap == 0)
return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
-
+
/*Otherwise check the stride by itself */
return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
ras->ras_consecutive_pages == ras->ras_stride_pages;
{
unsigned long stride_gap = index - ras->ras_last_readpage - 1;
- if (!stride_io_mode(ras) && (stride_gap != 0 ||
+ if (!stride_io_mode(ras) && (stride_gap != 0 ||
ras->ras_consecutive_stride_requests == 0)) {
ras->ras_stride_pages = ras->ras_consecutive_pages;
ras->ras_stride_length = stride_gap +ras->ras_consecutive_pages;
unsigned long stride_len;
LASSERT(ras->ras_stride_length > 0);
- LASSERTF(ras->ras_window_start + ras->ras_window_len
+ LASSERTF(ras->ras_window_start + ras->ras_window_len
>= ras->ras_stride_offset, "window_start %lu, window_len %lu"
" stride_offset %lu\n", ras->ras_window_start,
ras->ras_window_len, ras->ras_stride_offset);
window_len += step * ras->ras_stride_length + left;
- if (stride_page_count(ras, window_len) <= ra->ra_max_pages)
+ if (stride_page_count(ras, window_len) <= ra->ra_max_pages_per_file)
ras->ras_window_len = window_len;
RAS_CDEBUG(ras);
RAS_CDEBUG(ras);
}
-static void ras_increase_window(struct ll_readahead_state *ras,
- struct ll_ra_info *ra, struct inode *inode)
+static void ras_increase_window(struct ll_readahead_state *ras,
+ struct ll_ra_info *ra, struct inode *inode)
{
- __u64 step;
- __u32 size;
- int rc;
-
- step = ((loff_t)(ras->ras_window_start +
- ras->ras_window_len)) << CFS_PAGE_SHIFT;
- size = sizeof(step);
- /*Get rpc_size for this offset (step) */
- rc = obd_get_info(ll_i2obdexp(inode), sizeof(KEY_OFF_RPCSIZE),
- KEY_OFF_RPCSIZE, &size, &step,
- ll_i2info(inode)->lli_smd);
- if (rc)
- step = INIT_RAS_WINDOW_PAGES;
-
- if (stride_io_mode(ras))
- ras_stride_increase_window(ras, ra, (unsigned long)step);
- else
- ras->ras_window_len = min(ras->ras_window_len + (unsigned long)step,
- ra->ra_max_pages);
+ __u64 step;
+ __u32 size;
+ int rc;
+
+ step = ((loff_t)(ras->ras_window_start +
+ ras->ras_window_len)) << CFS_PAGE_SHIFT;
+ size = sizeof(step);
+ /*Get rpc_size for this offset (step) */
+ rc = obd_get_info(ll_i2obdexp(inode), sizeof(KEY_OFF_RPCSIZE),
+ KEY_OFF_RPCSIZE, &size, &step,
+ ll_i2info(inode)->lli_smd);
+ if (rc)
+ step = INIT_RAS_WINDOW_PAGES;
+
+ if (stride_io_mode(ras))
+ ras_stride_increase_window(ras, ra, (unsigned long)step);
+ else
+ ras->ras_window_len = min(ras->ras_window_len +
+ (unsigned long)step,
+ ra->ra_max_pages);
}
static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
index < ras->ras_next_readahead &&
index_in_window(index, ras->ras_window_start, 0,
ras->ras_window_len)) {
- ra_miss = 1;
+ ra_miss = 1;
ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW);
}
/* On the second access to a file smaller than the tunable
* ra_max_read_ahead_whole_pages trigger RA on all pages in the
- * file up to ra_max_pages. This is simply a best effort and
- * only occurs once per open file. Normal RA behavior is reverted
+ * file up to ra_max_pages_per_file. This is simply a best effort
+ * and only occurs once per open file. Normal RA behavior is reverted
* to for subsequent IO. The mmap case does not increment
* ras_requests and thus can never trigger this behavior. */
if (ras->ras_requests == 2 && !ras->ras_request_index) {
CFS_PAGE_SHIFT;
CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
- ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
+ ra->ra_max_read_ahead_whole_pages,
+ ra->ra_max_pages_per_file);
if (kms_pages &&
kms_pages <= ra->ra_max_read_ahead_whole_pages) {
ras->ras_window_start = 0;
ras->ras_last_readpage = 0;
ras->ras_next_readahead = 0;
- ras->ras_window_len = min(ra->ra_max_pages,
+ ras->ras_window_len = min(ra->ra_max_pages_per_file,
ra->ra_max_read_ahead_whole_pages);
GOTO(out_unlock, 0);
}
}
if (zero) {
- /* check whether it is in stride I/O mode*/
+ /* check whether it is in stride I/O mode*/
if (!index_in_stride_window(index, ras, inode)) {
ras_reset(ras, index);
ras->ras_consecutive_pages++;
ras_stride_reset(ras);
GOTO(out_unlock, 0);
} else {
- ras->ras_consecutive_requests = 0;
+ ras->ras_consecutive_requests = 0;
if (++ras->ras_consecutive_stride_requests > 1)
stride_detect = 1;
RAS_CDEBUG(ras);
if (ra_miss) {
if (index_in_stride_window(index, ras, inode) &&
stride_io_mode(ras)) {
- /*If stride-RA hit cache miss, the stride dector
+ /*If stride-RA hit cache miss, the stride dector
*will not be reset to avoid the overhead of
*redetecting read-ahead mode */
if (index != ras->ras_last_readpage + 1)
ras->ras_consecutive_pages = 0;
RAS_CDEBUG(ras);
} else {
- /* Reset both stride window and normal RA window */
+ /* Reset both stride window and normal RA
+ * window */
ras_reset(ras, index);
ras->ras_consecutive_pages++;
ras_stride_reset(ras);
} else if (stride_io_mode(ras)) {
/* If this is contiguous read but in stride I/O mode
* currently, check whether stride step still is valid,
- * if invalid, it will reset the stride ra window*/
+ * if invalid, it will reset the stride ra window*/
if (!index_in_stride_window(index, ras, inode)) {
/* Shrink stride read-ahead window to be zero */
ras_stride_reset(ras);
* uselessly reading and discarding pages for random IO the window is
* only increased once per consecutive request received. */
if ((ras->ras_consecutive_requests > 1 &&
- !ras->ras_request_index) || stride_detect)
- ras_increase_window(ras, ra, inode);
+ !ras->ras_request_index) || stride_detect)
+ ras_increase_window(ras, ra, inode);
EXIT;
out_unlock:
RAS_CDEBUG(ras);
GOTO(out, rc = PTR_ERR(llap));
}
- if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages_per_file)
ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index,
llap->llap_defer_uptodate);
LL_CDEBUG_PAGE(D_PAGE, page, "queued readpage\n");
/* We have just requested the actual page we want, see if we can tack
* on some readahead to that page's RPC before it is sent. */
- if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages_per_file)
ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
fd->fd_flags);
CERROR("the llap wasn't freed\n");
(*pp)->mapping = NULL;
if (page_count(*pp) != 1)
- CERROR("page %p, flags %#lx, count %i, private %p\n",
- (*pp), (unsigned long)(*pp)->flags, page_count(*pp),
- (void*)page_private(*pp));
+ CERROR("page %p, flags %#lx, count %i, "
+ "private %p\n", (*pp),
+ (unsigned long)(*pp)->flags,
+ page_count(*pp),
+ (void*)page_private(*pp));
__free_pages(*pp, 0);
}
}