Whamcloud - gitweb
LU-12518 llite: proper names/types for offset/pages 48/37248/8
authorAndreas Dilger <adilger@whamcloud.com>
Wed, 15 Jan 2020 10:30:32 +0000 (03:30 -0700)
committerOleg Drokin <green@whamcloud.com>
Tue, 28 Jan 2020 06:02:55 +0000 (06:02 +0000)
Use loff_t for file offsets and pgoff_t for page index values
instead of unsigned long, so that it is possible to distinguish
what type of value is being used in the byte-granular readahead
code.  Otherwise, it is difficult to determine what units "start"
or "end" in a given function are in.

Rename variables that reference page index values with an "_idx"
suffix to make this clear when reading the code.  Similarly, use
"bytes" or "pages" for variable names instead of "count" or "len".

Fix stride_page_count() to properly use loff_t for the byte_count,
which might otherwise overflow for large strides.

Cast pgoff_t vars to loff_t before PAGE_SIZE shift to avoid overflow.
Use shift and mask with PAGE_SIZE and PAGE_MASK instead of mod/div.

Use proper 64-bit division functions for the loff_t types when
calculating stride, since they are not guaranteed to be within 4GB.

Remove unused "remainder" argument from ras_align() function.

Fixes: 91d264551508 ("LU-12518 llite: support page unaligned stride readahead")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Ie1e18e0766bde2a72311e25536dbb562ce3ebbe5
Reviewed-on: https://review.whamcloud.com/37248
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Gu Zheng <gzheng@ddn.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
13 files changed:
lustre/include/cl_object.h
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/rw.c
lustre/llite/vvp_internal.h
lustre/llite/vvp_io.c
lustre/lov/lov_io.c
lustre/mdc/mdc_dev.c
lustre/mdt/mdt_io.c
lustre/obdclass/integrity.c
lustre/osc/osc_cache.c
lustre/osc/osc_io.c
lustre/osd-zfs/osd_io.c

index f589de4..144e0d0 100644 (file)
@@ -1476,16 +1476,16 @@ struct cl_read_ahead {
        /* Maximum page index the readahead window will end.
         * This is determined DLM lock coverage, RPC and stripe boundary.
         * cra_end is included. */
-       pgoff_t cra_end;
+       pgoff_t         cra_end_idx;
        /* optimal RPC size for this read, by pages */
-       unsigned long cra_rpc_size;
+       unsigned long   cra_rpc_pages;
        /* Release callback. If readahead holds resources underneath, this
         * function should be called to release it. */
-       void    (*cra_release)(const struct lu_env *env, void *cbdata);
+       void            (*cra_release)(const struct lu_env *env, void *cbdata);
        /* Callback data for cra_release routine */
-       void    *cra_cbdata;
+       void            *cra_cbdata;
        /* whether lock is in contention */
-       bool    cra_contention;
+       bool            cra_contention;
 };
 
 static inline void cl_read_ahead_release(const struct lu_env *env,
index 34c524f..e11b26c 100644 (file)
@@ -466,7 +466,7 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
         * client PAGE_SIZE to be used on that client, if server's PAGE_SIZE is
         * smaller then offset may be not aligned and that data is just ignored.
         */
-       if (rnb->rnb_offset % PAGE_SIZE)
+       if (rnb->rnb_offset & ~PAGE_MASK)
                RETURN_EXIT;
 
        /* Server returns whole file or just file tail if it fills in reply
@@ -486,9 +486,9 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
        data = (char *)rnb + sizeof(*rnb);
 
        lnb.lnb_file_offset = rnb->rnb_offset;
-       start = lnb.lnb_file_offset / PAGE_SIZE;
+       start = lnb.lnb_file_offset >> PAGE_SHIFT;
        index = 0;
-       LASSERT(lnb.lnb_file_offset % PAGE_SIZE == 0);
+       LASSERT((lnb.lnb_file_offset & ~PAGE_MASK) == 0);
        lnb.lnb_page_offset = 0;
        do {
                lnb.lnb_data = data + (index << PAGE_SHIFT);
index 0fb485f..76d72c5 100644 (file)
@@ -499,20 +499,20 @@ struct ll_ra_info {
  * counted by page index.
  */
 struct ra_io_arg {
-       pgoff_t ria_start; /* start offset of read-ahead*/
-       pgoff_t ria_end; /* end offset of read-ahead*/
-       unsigned long ria_reserved; /* reserved pages for read-ahead */
-       pgoff_t ria_end_min; /* minimum end to cover current read */
-       bool ria_eof; /* reach end of file */
-       /* If stride read pattern is detected, ria_stoff means where
-        * stride read is started. Note: for normal read-ahead, the
+       pgoff_t         ria_start_idx;  /* start offset of read-ahead*/
+       pgoff_t         ria_end_idx;    /* end offset of read-ahead*/
+       unsigned long   ria_reserved;   /* reserved pages for read-ahead */
+       pgoff_t         ria_end_idx_min;/* minimum end to cover current read */
+       bool            ria_eof;        /* reach end of file */
+       /* If stride read pattern is detected, ria_stoff is the byte offset
+        * where stride read is started. Note: for normal read-ahead, the
         * value here is meaningless, and also it will not be accessed*/
-       unsigned long ria_stoff;
+       loff_t          ria_stoff;
        /* ria_length and ria_bytes are the length and pages length in the
         * stride I/O mode. And they will also be used to check whether
         * it is stride I/O read-ahead in the read-ahead pages*/
-       unsigned long ria_length;
-       unsigned long ria_bytes;
+       loff_t          ria_length;
+       loff_t          ria_bytes;
 };
 
 /* LL_HIST_MAX=32 causes an overflow */
@@ -716,9 +716,9 @@ struct ll_sb_info {
  * per file-descriptor read-ahead data.
  */
 struct ll_readahead_state {
-       spinlock_t  ras_lock;
+       spinlock_t      ras_lock;
        /* End byte that read(2) try to read.  */
-       unsigned long ras_last_read_end;
+       loff_t          ras_last_read_end_bytes;
         /*
         * number of bytes read after last read-ahead window reset. As window
          * is reset on each seek, this is effectively a number of consecutive
@@ -729,13 +729,13 @@ struct ll_readahead_state {
          * case, it probably doesn't make sense to expand window to
          * PTLRPC_MAX_BRW_PAGES on the third access.
          */
-       unsigned long ras_consecutive_bytes;
+       loff_t          ras_consecutive_bytes;
         /*
          * number of read requests after the last read-ahead window reset
          * As window is reset on each seek, this is effectively the number
          * on consecutive read request and is used to trigger read-ahead.
          */
-       unsigned long ras_consecutive_requests;
+       unsigned long   ras_consecutive_requests;
         /*
          * Parameters of current read-ahead window. Handled by
          * ras_update(). On the initial access to the file or after a seek,
@@ -743,12 +743,13 @@ struct ll_readahead_state {
          * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
          * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
          */
-       pgoff_t ras_window_start, ras_window_len;
+       pgoff_t         ras_window_start_idx;
+       pgoff_t         ras_window_pages;
        /*
-        * Optimal RPC size. It decides how many pages will be sent
-        * for each read-ahead.
+        * Optimal RPC size in pages.
+        * It decides how many pages will be sent for each read-ahead.
         */
-       unsigned long ras_rpc_size;
+       unsigned long   ras_rpc_pages;
         /*
          * Where next read-ahead should start at. This lies within read-ahead
          * window. Read-ahead window is read in pieces rather than at once
@@ -756,13 +757,13 @@ struct ll_readahead_state {
          * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
          * not covered by DLM lock.
          */
-       pgoff_t ras_next_readahead;
+       pgoff_t         ras_next_readahead_idx;
         /*
          * Total number of ll_file_read requests issued, reads originating
          * due to mmap are not counted in this total.  This value is used to
          * trigger full file read-ahead after multiple reads to a small file.
          */
-       unsigned long ras_requests;
+       unsigned long   ras_requests;
         /*
          * The following 3 items are used for detecting the stride I/O
          * mode.
@@ -774,31 +775,29 @@ struct ll_readahead_state {
         * ras_stride_bytes = stride_bytes;
         * Note: all these three items are counted by bytes.
         */
-       unsigned long ras_stride_length;
-       unsigned long ras_stride_bytes;
-       unsigned long ras_stride_offset;
+       loff_t          ras_stride_offset;
+       loff_t          ras_stride_length;
+       loff_t          ras_stride_bytes;
         /*
          * number of consecutive stride request count, and it is similar as
          * ras_consecutive_requests, but used for stride I/O mode.
          * Note: only more than 2 consecutive stride request are detected,
          * stride read-ahead will be enable
          */
-       unsigned long ras_consecutive_stride_requests;
+       unsigned long   ras_consecutive_stride_requests;
        /* index of the last page that async readahead starts */
-       pgoff_t ras_async_last_readpage;
+       pgoff_t         ras_async_last_readpage_idx;
        /* whether we should increase readahead window */
-       bool ras_need_increase_window;
+       bool            ras_need_increase_window;
        /* whether ra miss check should be skipped */
-       bool ras_no_miss_check;
+       bool            ras_no_miss_check;
 };
 
 struct ll_readahead_work {
        /** File to readahead */
        struct file                     *lrw_file;
-       /** Start bytes */
-       unsigned long                    lrw_start;
-       /** End bytes */
-       unsigned long                    lrw_end;
+       pgoff_t                          lrw_start_idx;
+       pgoff_t                          lrw_end_idx;
 
        /* async worker to handler read */
        struct work_struct               lrw_readahead_work;
@@ -882,7 +881,7 @@ static inline bool ll_sbi_has_file_heat(struct ll_sb_info *sbi)
        return !!(sbi->ll_flags & LL_SBI_FILE_HEAT);
 }
 
-void ll_ras_enter(struct file *f, unsigned long pos, unsigned long count);
+void ll_ras_enter(struct file *f, loff_t pos, size_t count);
 
 /* llite/lcommon_misc.c */
 int cl_ocd_update(struct obd_device *host, struct obd_device *watched,
index f1c7592..61e8baf 100644 (file)
@@ -82,7 +82,8 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
 
 static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
                                     struct ra_io_arg *ria,
-                                    unsigned long pages, unsigned long min)
+                                    unsigned long pages,
+                                    unsigned long pages_min)
 {
         struct ll_ra_info *ra = &sbi->ll_ra_info;
         long ret;
@@ -102,18 +103,18 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
        }
 
 out:
-       if (ret < min) {
+       if (ret < pages_min) {
                /* override ra limit for maximum performance */
-               atomic_add(min - ret, &ra->ra_cur_pages);
-               ret = min;
+               atomic_add(pages_min - ret, &ra->ra_cur_pages);
+               ret = pages_min;
        }
        RETURN(ret);
 }
 
-void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
+void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long pages)
 {
        struct ll_ra_info *ra = &sbi->ll_ra_info;
-       atomic_sub(len, &ra->ra_cur_pages);
+       atomic_sub(pages, &ra->ra_cur_pages);
 }
 
 static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
@@ -129,26 +130,27 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
 }
 
 #define RAS_CDEBUG(ras) \
-       CDEBUG(D_READA,                                                      \
-              "lre %lu cr %lu cb %lu ws %lu wl %lu nra %lu rpc %lu "        \
-              "r %lu csr %lu sf %lu sb %lu sl %lu lr %lu\n",                \
-              ras->ras_last_read_end, ras->ras_consecutive_requests,        \
-              ras->ras_consecutive_bytes, ras->ras_window_start,            \
-              ras->ras_window_len, ras->ras_next_readahead,                 \
-              ras->ras_rpc_size, ras->ras_requests,                         \
+       CDEBUG(D_READA,                                                      \
+              "lre %llu cr %lu cb %llu wsi %lu wp %lu nra %lu rpc %lu "     \
+              "r %lu csr %lu so %llu sb %llu sl %llu lr %lu\n",             \
+              ras->ras_last_read_end_bytes, ras->ras_consecutive_requests,  \
+              ras->ras_consecutive_bytes, ras->ras_window_start_idx,        \
+              ras->ras_window_pages, ras->ras_next_readahead_idx,           \
+              ras->ras_rpc_pages, ras->ras_requests,                        \
               ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
               ras->ras_stride_bytes, ras->ras_stride_length,                \
-              ras->ras_async_last_readpage)
+              ras->ras_async_last_readpage_idx)
 
-static int pos_in_window(unsigned long pos, unsigned long point,
-                        unsigned long before, unsigned long after)
+static bool pos_in_window(loff_t pos, loff_t point,
+                         unsigned long before, unsigned long after)
 {
-        unsigned long start = point - before, end = point + after;
+       loff_t start = point - before;
+       loff_t end = point + after;
 
-        if (start > point)
-               start = 0;
-        if (end < point)
-               end = ~0;
+       if (start > point)
+               start = 0;
+       if (end < point)
+               end = ~0;
 
        return start <= pos && pos <= end;
 }
@@ -228,9 +230,9 @@ out:
        RETURN(rc);
 }
 
-#define RIA_DEBUG(ria)                                         \
-       CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rb %lu\n", \
-              ria->ria_start, ria->ria_end, ria->ria_stoff,    \
+#define RIA_DEBUG(ria)                                                 \
+       CDEBUG(D_READA, "rs %lu re %lu ro %llu rl %llu rb %llu\n",      \
+              ria->ria_start_idx, ria->ria_end_idx, ria->ria_stoff,    \
               ria->ria_length, ria->ria_bytes)
 
 static inline int stride_io_mode(struct ll_readahead_state *ras)
@@ -238,7 +240,7 @@ static inline int stride_io_mode(struct ll_readahead_state *ras)
         return ras->ras_consecutive_stride_requests > 1;
 }
 
-/* The function calculates how much pages will be read in
+/* The function calculates how many bytes will be read in
  * [off, off + length], in such stride IO area,
  * stride_offset = st_off, stride_lengh = st_len,
  * stride_bytes = st_bytes
@@ -256,31 +258,29 @@ static inline int stride_io_mode(struct ll_readahead_state *ras)
  *          =   |<----->|  +  |-------------------------------------| +   |---|
  *             start_left                 st_bytes * i                 end_left
  */
-static unsigned long
-stride_byte_count(unsigned long st_off, unsigned long st_len,
-                 unsigned long st_bytes, unsigned long off,
-                 unsigned long length)
+static loff_t stride_byte_count(loff_t st_off, loff_t st_len, loff_t st_bytes,
+                               loff_t off, loff_t length)
 {
-        __u64 start = off > st_off ? off - st_off : 0;
-        __u64 end = off + length > st_off ? off + length - st_off : 0;
-        unsigned long start_left = 0;
-        unsigned long end_left = 0;
-       unsigned long bytes_count;
+       u64 start = off > st_off ? off - st_off : 0;
+       u64 end = off + length > st_off ? off + length - st_off : 0;
+       u64 start_left;
+       u64 end_left;
+       u64 bytes_count;
 
-        if (st_len == 0 || length == 0 || end == 0)
-                return length;
+       if (st_len == 0 || length == 0 || end == 0)
+               return length;
 
-       start_left = do_div(start, st_len);
+       start = div64_u64_rem(start, st_len, &start_left);
        if (start_left < st_bytes)
                start_left = st_bytes - start_left;
        else
                start_left = 0;
 
-       end_left = do_div(end, st_len);
+       end = div64_u64_rem(end, st_len, &end_left);
        if (end_left > st_bytes)
                end_left = st_bytes;
 
-       CDEBUG(D_READA, "start %llu, end %llu start_left %lu end_left %lu\n",
+       CDEBUG(D_READA, "start %llu, end %llu start_left %llu end_left %llu\n",
               start, end, start_left, end_left);
 
        if (start == end)
@@ -290,48 +290,45 @@ stride_byte_count(unsigned long st_off, unsigned long st_len,
                        st_bytes * (end - start - 1) + end_left;
 
        CDEBUG(D_READA,
-              "st_off %lu, st_len %lu st_bytes %lu off %lu length %lu bytescount %lu\n",
+              "st_off %llu, st_len %llu st_bytes %llu off %llu length %llu bytescount %llu\n",
               st_off, st_len, st_bytes, off, length, bytes_count);
 
        return bytes_count;
 }
 
-static int ria_page_count(struct ra_io_arg *ria)
+static unsigned long ria_page_count(struct ra_io_arg *ria)
 {
-       u64 length_bytes = ria->ria_end >= ria->ria_start ?
-               (ria->ria_end - ria->ria_start + 1) << PAGE_SHIFT : 0;
-       unsigned int bytes_count, pg_count;
+       loff_t length_bytes = ria->ria_end_idx >= ria->ria_start_idx ?
+               (loff_t)(ria->ria_end_idx -
+                        ria->ria_start_idx + 1) << PAGE_SHIFT : 0;
+       loff_t bytes_count;
 
        if (ria->ria_length > ria->ria_bytes && ria->ria_bytes &&
-           (ria->ria_length % PAGE_SIZE || ria->ria_bytes % PAGE_SIZE ||
-             ria->ria_stoff % PAGE_SIZE)) {
+           (ria->ria_length & ~PAGE_MASK || ria->ria_bytes & ~PAGE_MASK ||
+            ria->ria_stoff & ~PAGE_MASK)) {
                /* Over-estimate un-aligned page stride read */
-               pg_count = ((ria->ria_bytes + PAGE_SIZE - 1) >>
-                           PAGE_SHIFT) + 1;
+               unsigned long pg_count = ((ria->ria_bytes +
+                                          PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
                pg_count *= length_bytes / ria->ria_length + 1;
 
                return pg_count;
        }
        bytes_count = stride_byte_count(ria->ria_stoff, ria->ria_length,
-                                        ria->ria_bytes, ria->ria_start,
-                                        length_bytes);
+                                       ria->ria_bytes,
+                                       (loff_t)ria->ria_start_idx<<PAGE_SHIFT,
+                                       length_bytes);
        return (bytes_count + PAGE_SIZE - 1) >> PAGE_SHIFT;
 }
 
-static unsigned long ras_align(struct ll_readahead_state *ras,
-                              pgoff_t index, unsigned long *remainder)
+static pgoff_t ras_align(struct ll_readahead_state *ras, pgoff_t index)
 {
-       unsigned long rem = index % ras->ras_rpc_size;
-       if (remainder != NULL)
-               *remainder = rem;
-       return index - rem;
+       return index - (index % ras->ras_rpc_pages);
 }
 
-/*Check whether the index is in the defined ra-window */
-static bool ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
+/* Check whether the index is in the defined ra-window */
+static bool ras_inside_ra_window(pgoff_t idx, struct ra_io_arg *ria)
 {
-       unsigned long offset;
-       unsigned long pos = idx << PAGE_SHIFT;
+       loff_t pos = (loff_t)idx << PAGE_SHIFT;
 
        /* If ria_length == ria_bytes, it means non-stride I/O mode,
          * idx should always inside read-ahead window in this case
@@ -342,12 +339,16 @@ static bool ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
                return true;
 
        if (pos >= ria->ria_stoff) {
-               offset = (pos - ria->ria_stoff) % ria->ria_length;
+               u64 offset;
+
+               div64_u64_rem(pos - ria->ria_stoff, ria->ria_length, &offset);
+
                if (offset < ria->ria_bytes ||
                    (ria->ria_length - offset) < PAGE_SIZE)
                        return true;
-       } else if (pos + PAGE_SIZE > ria->ria_stoff)
+       } else if (pos + PAGE_SIZE > ria->ria_stoff) {
                return true;
+       }
 
        return false;
 }
@@ -364,11 +365,12 @@ ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io,
        LASSERT(ria != NULL);
        RIA_DEBUG(ria);
 
-       for (page_idx = ria->ria_start;
-            page_idx <= ria->ria_end && ria->ria_reserved > 0; page_idx++) {
+       for (page_idx = ria->ria_start_idx;
+            page_idx <= ria->ria_end_idx && ria->ria_reserved > 0;
+            page_idx++) {
                if (ras_inside_ra_window(page_idx, ria)) {
-                       if (ra.cra_end == 0 || ra.cra_end < page_idx) {
-                               unsigned long end;
+                       if (ra.cra_end_idx == 0 || ra.cra_end_idx < page_idx) {
+                               pgoff_t end_idx;
 
                                cl_read_ahead_release(env, &ra);
 
@@ -376,35 +378,36 @@ ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io,
                                if (rc < 0)
                                        break;
 
-                               /* Do not shrink the ria_end at any case until
+                               /* Do not shrink ria_end_idx at any case until
                                 * the minimum end of current read is covered.
-                                * And only shrink the ria_end if the matched
+                                * And only shrink ria_end_idx if the matched
                                 * LDLM lock doesn't cover more. */
-                               if (page_idx > ra.cra_end ||
+                               if (page_idx > ra.cra_end_idx ||
                                    (ra.cra_contention &&
-                                    page_idx > ria->ria_end_min)) {
-                                       ria->ria_end = ra.cra_end;
+                                    page_idx > ria->ria_end_idx_min)) {
+                                       ria->ria_end_idx = ra.cra_end_idx;
                                        break;
                                }
 
                                CDEBUG(D_READA, "idx: %lu, ra: %lu, rpc: %lu\n",
-                                      page_idx, ra.cra_end, ra.cra_rpc_size);
-                               LASSERTF(ra.cra_end >= page_idx,
+                                      page_idx, ra.cra_end_idx,
+                                      ra.cra_rpc_pages);
+                               LASSERTF(ra.cra_end_idx >= page_idx,
                                         "object: %p, indcies %lu / %lu\n",
-                                        io->ci_obj, ra.cra_end, page_idx);
+                                        io->ci_obj, ra.cra_end_idx, page_idx);
                                /* update read ahead RPC size.
                                 * NB: it's racy but doesn't matter */
-                               if (ras->ras_rpc_size != ra.cra_rpc_size &&
-                                   ra.cra_rpc_size > 0)
-                                       ras->ras_rpc_size = ra.cra_rpc_size;
+                               if (ras->ras_rpc_pages != ra.cra_rpc_pages &&
+                                   ra.cra_rpc_pages > 0)
+                                       ras->ras_rpc_pages = ra.cra_rpc_pages;
                                /* trim it to align with optimal RPC size */
-                               end = ras_align(ras, ria->ria_end + 1, NULL);
-                               if (end > 0 && !ria->ria_eof)
-                                       ria->ria_end = end - 1;
-                               if (ria->ria_end < ria->ria_end_min)
-                                       ria->ria_end = ria->ria_end_min;
+                               end_idx = ras_align(ras, ria->ria_end_idx + 1);
+                               if (end_idx > 0 && !ria->ria_eof)
+                                       ria->ria_end_idx = end_idx - 1;
+                               if (ria->ria_end_idx < ria->ria_end_idx_min)
+                                       ria->ria_end_idx = ria->ria_end_idx_min;
                        }
-                       if (page_idx > ria->ria_end)
+                       if (page_idx > ria->ria_end_idx)
                                break;
 
                        /* If the page is inside the read-ahead window */
@@ -424,16 +427,17 @@ ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io,
                         * read-ahead mode, then check whether it should skip
                         * the stride gap.
                         */
-                       unsigned long offset;
-                       unsigned long pos = page_idx << PAGE_SHIFT;
+                       loff_t pos = (loff_t)page_idx << PAGE_SHIFT;
+                       u64 offset;
 
-                       offset = (pos - ria->ria_stoff) % ria->ria_length;
+                       div64_u64_rem(pos - ria->ria_stoff, ria->ria_length,
+                                     &offset);
                        if (offset >= ria->ria_bytes) {
                                pos += (ria->ria_length - offset);
                                if ((pos >> PAGE_SHIFT) >= page_idx + 1)
                                        page_idx = (pos >> PAGE_SHIFT) - 1;
                                CDEBUG(D_READA,
-                                      "Stride: jump %lu pages to %lu\n",
+                                      "Stride: jump %llu pages to %lu\n",
                                       ria->ria_length - offset, page_idx);
                                continue;
                        }
@@ -493,12 +497,12 @@ static void ll_readahead_handle_work(struct work_struct *wq)
        struct ll_readahead_state *ras;
        struct cl_io *io;
        struct cl_2queue *queue;
-       pgoff_t ra_end = 0;
-       unsigned long len, mlen = 0;
+       pgoff_t ra_end_idx = 0;
+       unsigned long pages, pages_min = 0;
        struct file *file;
        __u64 kms;
        int rc;
-       unsigned long end_index;
+       pgoff_t eof_index;
 
        work = container_of(wq, struct ll_readahead_work,
                            lrw_readahead_work);
@@ -526,28 +530,28 @@ static void ll_readahead_handle_work(struct work_struct *wq)
        ria = &ll_env_info(env)->lti_ria;
        memset(ria, 0, sizeof(*ria));
 
-       ria->ria_start = work->lrw_start;
+       ria->ria_start_idx = work->lrw_start_idx;
        /* Truncate RA window to end of file */
-       end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT);
-       if (end_index <= work->lrw_end) {
-               work->lrw_end = end_index;
+       eof_index = (pgoff_t)(kms - 1) >> PAGE_SHIFT;
+       if (eof_index <= work->lrw_end_idx) {
+               work->lrw_end_idx = eof_index;
                ria->ria_eof = true;
        }
-       if (work->lrw_end <= work->lrw_start)
+       if (work->lrw_end_idx <= work->lrw_start_idx)
                GOTO(out_put_env, rc = 0);
 
-       ria->ria_end = work->lrw_end;
-       len = ria->ria_end - ria->ria_start + 1;
+       ria->ria_end_idx = work->lrw_end_idx;
+       pages = ria->ria_end_idx - ria->ria_start_idx + 1;
        ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria,
-                                           ria_page_count(ria), mlen);
+                                           ria_page_count(ria), pages_min);
 
        CDEBUG(D_READA,
               "async reserved pages: %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
-              ria->ria_reserved, len, mlen,
+              ria->ria_reserved, pages, pages_min,
               atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
               ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
 
-       if (ria->ria_reserved < len) {
+       if (ria->ria_reserved < pages) {
                ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
                if (PAGES_TO_MiB(ria->ria_reserved) < 1) {
                        ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved);
@@ -555,7 +559,7 @@ static void ll_readahead_handle_work(struct work_struct *wq)
                }
        }
 
-       rc = cl_io_rw_init(env, io, CIT_READ, ria->ria_start, len);
+       rc = cl_io_rw_init(env, io, CIT_READ, ria->ria_start_idx, pages);
        if (rc)
                GOTO(out_put_env, rc);
 
@@ -570,7 +574,8 @@ static void ll_readahead_handle_work(struct work_struct *wq)
        queue = &io->ci_queue;
        cl_2queue_init(queue);
 
-       rc = ll_read_ahead_pages(env, io, &queue->c2_qin, ras, ria, &ra_end);
+       rc = ll_read_ahead_pages(env, io, &queue->c2_qin, ras, ria,
+                                &ra_end_idx);
        if (ria->ria_reserved != 0)
                ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved);
        if (queue->c2_qin.pl_nr > 0) {
@@ -580,10 +585,10 @@ static void ll_readahead_handle_work(struct work_struct *wq)
                if (rc == 0)
                        task_io_account_read(PAGE_SIZE * count);
        }
-       if (ria->ria_end == ra_end && ra_end == (kms >> PAGE_SHIFT))
+       if (ria->ria_end_idx == ra_end_idx && ra_end_idx == (kms >> PAGE_SHIFT))
                ll_ra_stats_inc(inode, RA_STAT_EOF);
 
-       if (ra_end != ria->ria_end)
+       if (ra_end_idx != ria->ria_end_idx)
                ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
 
        /* TODO: discard all pages until page reinit route is implemented */
@@ -599,7 +604,7 @@ out_io_fini:
 out_put_env:
        cl_env_put(env, &refcheck);
 out_free_work:
-       if (ra_end > 0)
+       if (ra_end_idx > 0)
                ll_ra_stats_inc_sbi(ll_i2sbi(inode), RA_STAT_ASYNC);
        ll_readahead_work_free(work);
 }
@@ -611,8 +616,8 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
 {
        struct vvp_io *vio = vvp_env_io(env);
        struct ll_thread_info *lti = ll_env_info(env);
-       unsigned long len, mlen = 0;
-       pgoff_t ra_end = 0, start = 0, end = 0;
+       unsigned long pages, pages_min = 0;
+       pgoff_t ra_end_idx = 0, start_idx = 0, end_idx = 0;
        struct inode *inode;
        struct ra_io_arg *ria = &lti->lti_ria;
        struct cl_object *clob;
@@ -623,7 +628,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
        clob = io->ci_obj;
        inode = vvp_object_inode(clob);
 
-       memset(ria, 0, sizeof *ria);
+       memset(ria, 0, sizeof(*ria));
        ret = ll_readahead_file_kms(env, io, &kms);
        if (ret != 0)
                RETURN(ret);
@@ -636,52 +641,52 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
        spin_lock(&ras->ras_lock);
 
        /**
-        * Note: other thread might rollback the ras_next_readahead,
+        * Note: other thread might rollback the ras_next_readahead_idx,
         * if it can not get the full size of prepared pages, see the
         * end of this function. For stride read ahead, it needs to
         * make sure the offset is no less than ras_stride_offset,
         * so that stride read ahead can work correctly.
         */
        if (stride_io_mode(ras))
-               start = max(ras->ras_next_readahead,
-                           ras->ras_stride_offset >> PAGE_SHIFT);
+               start_idx = max_t(pgoff_t, ras->ras_next_readahead_idx,
+                                 ras->ras_stride_offset >> PAGE_SHIFT);
        else
-               start = ras->ras_next_readahead;
+               start_idx = ras->ras_next_readahead_idx;
 
-       if (ras->ras_window_len > 0)
-               end = ras->ras_window_start + ras->ras_window_len - 1;
+       if (ras->ras_window_pages > 0)
+               end_idx = ras->ras_window_start_idx + ras->ras_window_pages - 1;
 
        /* Enlarge the RA window to encompass the full read */
        if (vio->vui_ra_valid &&
-           end < vio->vui_ra_start + vio->vui_ra_count - 1)
-               end = vio->vui_ra_start + vio->vui_ra_count - 1;
+           end_idx < vio->vui_ra_start_idx + vio->vui_ra_pages - 1)
+               end_idx = vio->vui_ra_start_idx + vio->vui_ra_pages - 1;
 
-        if (end != 0) {
-               unsigned long end_index;
+       if (end_idx != 0) {
+               pgoff_t eof_index;
 
                /* Truncate RA window to end of file */
-               end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT);
-               if (end_index <= end) {
-                       end = end_index;
+               eof_index = (pgoff_t)((kms - 1) >> PAGE_SHIFT);
+               if (eof_index <= end_idx) {
+                       end_idx = eof_index;
                        ria->ria_eof = true;
                }
-        }
-        ria->ria_start = start;
-        ria->ria_end = end;
-        /* If stride I/O mode is detected, get stride window*/
-        if (stride_io_mode(ras)) {
-                ria->ria_stoff = ras->ras_stride_offset;
-                ria->ria_length = ras->ras_stride_length;
+       }
+       ria->ria_start_idx = start_idx;
+       ria->ria_end_idx = end_idx;
+       /* If stride I/O mode is detected, get stride window*/
+       if (stride_io_mode(ras)) {
+               ria->ria_stoff = ras->ras_stride_offset;
+               ria->ria_length = ras->ras_stride_length;
                ria->ria_bytes = ras->ras_stride_bytes;
-        }
+       }
        spin_unlock(&ras->ras_lock);
 
-       if (end == 0) {
+       if (end_idx == 0) {
                ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
                RETURN(0);
        }
-       len = ria_page_count(ria);
-       if (len == 0) {
+       pages = ria_page_count(ria);
+       if (pages == 0) {
                ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
                RETURN(0);
        }
@@ -689,43 +694,46 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
        RAS_CDEBUG(ras);
        CDEBUG(D_READA, DFID": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
               PFID(lu_object_fid(&clob->co_lu)),
-              ria->ria_start, ria->ria_end,
-              vio->vui_ra_valid ? vio->vui_ra_start : 0,
-              vio->vui_ra_valid ? vio->vui_ra_count : 0,
+              ria->ria_start_idx, ria->ria_end_idx,
+              vio->vui_ra_valid ? vio->vui_ra_start_idx : 0,
+              vio->vui_ra_valid ? vio->vui_ra_pages : 0,
               hit);
 
        /* at least to extend the readahead window to cover current read */
        if (!hit && vio->vui_ra_valid &&
-           vio->vui_ra_start + vio->vui_ra_count > ria->ria_start)
-               ria->ria_end_min = vio->vui_ra_start + vio->vui_ra_count - 1;
+           vio->vui_ra_start_idx + vio->vui_ra_pages > ria->ria_start_idx)
+               ria->ria_end_idx_min =
+                       vio->vui_ra_start_idx + vio->vui_ra_pages - 1;
 
-       ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
-       if (ria->ria_reserved < len)
+       ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, pages,
+                                           pages_min);
+       if (ria->ria_reserved < pages)
                ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
 
        CDEBUG(D_READA, "reserved pages: %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
-              ria->ria_reserved, len, mlen,
+              ria->ria_reserved, pages, pages_min,
               atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
               ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
 
-       ret = ll_read_ahead_pages(env, io, queue, ras, ria, &ra_end);
+       ret = ll_read_ahead_pages(env, io, queue, ras, ria, &ra_end_idx);
 
        if (ria->ria_reserved != 0)
                ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved);
 
-       if (ra_end == end && ra_end == (kms >> PAGE_SHIFT))
+       if (ra_end_idx == end_idx && ra_end_idx == (kms >> PAGE_SHIFT))
                ll_ra_stats_inc(inode, RA_STAT_EOF);
 
-       CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
-              ra_end, end, ria->ria_end, ret);
+       CDEBUG(D_READA,
+              "ra_end_idx = %lu end_idx = %lu stride end = %lu pages = %d\n",
+              ra_end_idx, end_idx, ria->ria_end_idx, ret);
 
-       if (ra_end != end)
+       if (ra_end_idx != end_idx)
                ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
-       if (ra_end > 0) {
+       if (ra_end_idx > 0) {
                /* update the ras so that the next read-ahead tries from
                 * where we left off. */
                spin_lock(&ras->ras_lock);
-               ras->ras_next_readahead = ra_end + 1;
+               ras->ras_next_readahead_idx = ra_end_idx + 1;
                spin_unlock(&ras->ras_lock);
                RAS_CDEBUG(ras);
        }
@@ -735,7 +743,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
 
 static void ras_set_start(struct ll_readahead_state *ras, pgoff_t index)
 {
-       ras->ras_window_start = ras_align(ras, index, NULL);
+       ras->ras_window_start_idx = ras_align(ras, index);
 }
 
 /* called with the ras_lock held or from places where it doesn't matter */
@@ -743,9 +751,9 @@ static void ras_reset(struct ll_readahead_state *ras, pgoff_t index)
 {
        ras->ras_consecutive_requests = 0;
        ras->ras_consecutive_bytes = 0;
-       ras->ras_window_len = 0;
+       ras->ras_window_pages = 0;
        ras_set_start(ras, index);
-       ras->ras_next_readahead = max(ras->ras_window_start, index + 1);
+       ras->ras_next_readahead_idx = max(ras->ras_window_start_idx, index + 1);
 
        RAS_CDEBUG(ras);
 }
@@ -762,9 +770,9 @@ static void ras_stride_reset(struct ll_readahead_state *ras)
 void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
 {
        spin_lock_init(&ras->ras_lock);
-       ras->ras_rpc_size = PTLRPC_MAX_BRW_PAGES;
+       ras->ras_rpc_pages = PTLRPC_MAX_BRW_PAGES;
        ras_reset(ras, 0);
-       ras->ras_last_read_end = 0;
+       ras->ras_last_read_end_bytes = 0;
        ras->ras_requests = 0;
 }
 
@@ -773,15 +781,15 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
  * If it is in the stride window, return true, otherwise return false.
  */
 static bool read_in_stride_window(struct ll_readahead_state *ras,
-                                 unsigned long pos, unsigned long count)
+                                 loff_t pos, loff_t count)
 {
-       unsigned long stride_gap;
+       loff_t stride_gap;
 
        if (ras->ras_stride_length == 0 || ras->ras_stride_bytes == 0 ||
            ras->ras_stride_bytes == ras->ras_stride_length)
                return false;
 
-       stride_gap = pos - ras->ras_last_read_end - 1;
+       stride_gap = pos - ras->ras_last_read_end_bytes - 1;
 
        /* If it is contiguous read */
        if (stride_gap == 0)
@@ -795,13 +803,13 @@ static bool read_in_stride_window(struct ll_readahead_state *ras,
 }
 
 static void ras_init_stride_detector(struct ll_readahead_state *ras,
-                                    unsigned long pos, unsigned long count)
+                                    loff_t pos, loff_t count)
 {
-       unsigned long stride_gap = pos - ras->ras_last_read_end - 1;
+       loff_t stride_gap = pos - ras->ras_last_read_end_bytes - 1;
 
         LASSERT(ras->ras_consecutive_stride_requests == 0);
 
-       if (pos <= ras->ras_last_read_end) {
+       if (pos <= ras->ras_last_read_end_bytes) {
                 /*Reset stride window for forward read*/
                 ras_stride_reset(ras);
                 return;
@@ -816,12 +824,12 @@ static void ras_init_stride_detector(struct ll_readahead_state *ras,
 }
 
 static unsigned long
-stride_page_count(struct ll_readahead_state *ras, unsigned long len)
+stride_page_count(struct ll_readahead_state *ras, loff_t len)
 {
-       unsigned int bytes_count =
+       loff_t bytes_count =
                stride_byte_count(ras->ras_stride_offset,
                                  ras->ras_stride_length, ras->ras_stride_bytes,
-                                  ras->ras_stride_offset, len);
+                                 ras->ras_stride_offset, len);
 
        return (bytes_count + PAGE_SIZE - 1) >> PAGE_SHIFT;
 }
@@ -829,44 +837,47 @@ stride_page_count(struct ll_readahead_state *ras, unsigned long len)
 /* Stride Read-ahead window will be increased inc_len according to
  * stride I/O pattern */
 static void ras_stride_increase_window(struct ll_readahead_state *ras,
-                                       struct ll_ra_info *ra,
-                                       unsigned long inc_len)
+                                      struct ll_ra_info *ra, loff_t inc_bytes)
 {
-        unsigned long left, step, window_len;
-        unsigned long stride_len;
-       unsigned long end = ras->ras_window_start + ras->ras_window_len;
+       loff_t window_bytes, stride_bytes;
+       u64 left_bytes;
+       u64 step;
+       loff_t end;
 
-        LASSERT(ras->ras_stride_length > 0);
+       /* temporarily store in page units to reduce LASSERT() cost below */
+       end = ras->ras_window_start_idx + ras->ras_window_pages;
+
+       LASSERT(ras->ras_stride_length > 0);
        LASSERTF(end >= (ras->ras_stride_offset >> PAGE_SHIFT),
-                "window_start %lu, window_len %lu stride_offset %lu\n",
-                ras->ras_window_start, ras->ras_window_len,
+                "window_start_idx %lu, window_pages %lu stride_offset %llu\n",
+                ras->ras_window_start_idx, ras->ras_window_pages,
                 ras->ras_stride_offset);
 
        end <<= PAGE_SHIFT;
-       if (end < ras->ras_stride_offset)
-               stride_len = 0;
+       if (end <= ras->ras_stride_offset)
+               stride_bytes = 0;
        else
-               stride_len = end - ras->ras_stride_offset;
+               stride_bytes = end - ras->ras_stride_offset;
 
-       left = stride_len % ras->ras_stride_length;
-       window_len = (ras->ras_window_len << PAGE_SHIFT) - left;
+       div64_u64_rem(stride_bytes, ras->ras_stride_length, &left_bytes);
+       window_bytes = ((loff_t)ras->ras_window_pages << PAGE_SHIFT) -
+               left_bytes;
 
-       if (left < ras->ras_stride_bytes)
-                left += inc_len;
+       if (left_bytes < ras->ras_stride_bytes)
+               left_bytes += inc_bytes;
        else
-               left = ras->ras_stride_bytes + inc_len;
+               left_bytes = ras->ras_stride_bytes + inc_bytes;
 
        LASSERT(ras->ras_stride_bytes != 0);
 
-       step = left / ras->ras_stride_bytes;
-       left %= ras->ras_stride_bytes;
+       step = div64_u64_rem(left_bytes, ras->ras_stride_bytes, &left_bytes);
 
-       window_len += step * ras->ras_stride_length + left;
+       window_bytes += step * ras->ras_stride_length + left_bytes;
 
-       if (stride_page_count(ras, window_len) <= ra->ra_max_pages_per_file)
-               ras->ras_window_len = (window_len >> PAGE_SHIFT);
+       if (stride_page_count(ras, window_bytes) <= ra->ra_max_pages_per_file)
+               ras->ras_window_pages = (window_bytes >> PAGE_SHIFT);
 
-        RAS_CDEBUG(ras);
+       RAS_CDEBUG(ras);
 }
 
 static void ras_increase_window(struct inode *inode,
@@ -879,36 +890,34 @@ static void ras_increase_window(struct inode *inode,
         */
        if (stride_io_mode(ras)) {
                ras_stride_increase_window(ras, ra,
-                               ras->ras_rpc_size << PAGE_SHIFT);
+                                     (loff_t)ras->ras_rpc_pages << PAGE_SHIFT);
        } else {
-               unsigned long wlen;
+               pgoff_t window_pages;
 
-               wlen = min(ras->ras_window_len + ras->ras_rpc_size,
-                          ra->ra_max_pages_per_file);
-               if (wlen < ras->ras_rpc_size)
-                       ras->ras_window_len = wlen;
+               window_pages = min(ras->ras_window_pages + ras->ras_rpc_pages,
+                                  ra->ra_max_pages_per_file);
+               if (window_pages < ras->ras_rpc_pages)
+                       ras->ras_window_pages = window_pages;
                else
-                       ras->ras_window_len = ras_align(ras, wlen, NULL);
+                       ras->ras_window_pages = ras_align(ras, window_pages);
        }
 }
 
 /**
  * Seek within 8 pages are considered as sequential read for now.
  */
-static inline bool is_loose_seq_read(struct ll_readahead_state *ras,
-                                    unsigned long pos)
+static inline bool is_loose_seq_read(struct ll_readahead_state *ras, loff_t pos)
 {
-       return pos_in_window(pos, ras->ras_last_read_end,
-                            8 << PAGE_SHIFT, 8 << PAGE_SHIFT);
+       return pos_in_window(pos, ras->ras_last_read_end_bytes,
+                            8UL << PAGE_SHIFT, 8UL << PAGE_SHIFT);
 }
 
 static void ras_detect_read_pattern(struct ll_readahead_state *ras,
                                    struct ll_sb_info *sbi,
-                                   unsigned long pos, unsigned long count,
-                                   bool mmap)
+                                   loff_t pos, size_t count, bool mmap)
 {
        bool stride_detect = false;
-       unsigned long index = pos >> PAGE_SHIFT;
+       pgoff_t index = pos >> PAGE_SHIFT;
 
        /*
         * Reset the read-ahead window in two cases. First when the app seeks
@@ -943,25 +952,25 @@ static void ras_detect_read_pattern(struct ll_readahead_state *ras,
                 */
                if (!read_in_stride_window(ras, pos, count)) {
                        ras_stride_reset(ras);
-                       ras->ras_window_len = 0;
-                       ras->ras_next_readahead = index;
+                       ras->ras_window_pages = 0;
+                       ras->ras_next_readahead_idx = index;
                }
        }
 
        ras->ras_consecutive_bytes += count;
        if (mmap) {
-               unsigned int idx = (ras->ras_consecutive_bytes >> PAGE_SHIFT);
+               pgoff_t idx = ras->ras_consecutive_bytes >> PAGE_SHIFT;
 
-               if ((idx >= 4 && idx % 4 == 0) || stride_detect)
+               if ((idx >= 4 && (idx & 3UL) == 0) || stride_detect)
                        ras->ras_need_increase_window = true;
        } else if ((ras->ras_consecutive_requests > 1 || stride_detect)) {
                ras->ras_need_increase_window = true;
        }
 
-       ras->ras_last_read_end = pos + count - 1;
+       ras->ras_last_read_end_bytes = pos + count - 1;
 }
 
-void ll_ras_enter(struct file *f, unsigned long pos, unsigned long count)
+void ll_ras_enter(struct file *f, loff_t pos, size_t count)
 {
        struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
        struct ll_readahead_state *ras = &fd->fd_ras;
@@ -976,8 +985,8 @@ void ll_ras_enter(struct file *f, unsigned long pos, unsigned long count)
        ras->ras_no_miss_check = false;
        /*
         * On the second access to a file smaller than the tunable
-         * ra_max_read_ahead_whole_pages trigger RA on all pages in the
-         * file up to ra_max_pages_per_file.  This is simply a best effort
+        * ra_max_read_ahead_whole_pages trigger RA on all pages in the
+        * file up to ra_max_pages_per_file.  This is simply a best effort
         * and only occurs once per open file. Normal RA behavior is reverted
         * to for subsequent IO.
         */
@@ -989,36 +998,38 @@ void ll_ras_enter(struct file *f, unsigned long pos, unsigned long count)
                            PAGE_SHIFT;
 
                CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
-                       ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages_per_file);
-
-                if (kms_pages &&
-                    kms_pages <= ra->ra_max_read_ahead_whole_pages) {
-                        ras->ras_window_start = 0;
-                       ras->ras_next_readahead = index + 1;
-                        ras->ras_window_len = min(ra->ra_max_pages_per_file,
-                                ra->ra_max_read_ahead_whole_pages);
+                      ra->ra_max_read_ahead_whole_pages,
+                      ra->ra_max_pages_per_file);
+
+               if (kms_pages &&
+                   kms_pages <= ra->ra_max_read_ahead_whole_pages) {
+                       ras->ras_window_start_idx = 0;
+                       ras->ras_next_readahead_idx = index + 1;
+                       ras->ras_window_pages = min(ra->ra_max_pages_per_file,
+                                           ra->ra_max_read_ahead_whole_pages);
                        ras->ras_no_miss_check = true;
-                        GOTO(out_unlock, 0);
-                }
-        }
+                       GOTO(out_unlock, 0);
+               }
+       }
        ras_detect_read_pattern(ras, sbi, pos, count, false);
 out_unlock:
        spin_unlock(&ras->ras_lock);
 }
 
 static bool index_in_stride_window(struct ll_readahead_state *ras,
-                                  unsigned int index)
+                                  pgoff_t index)
 {
-       unsigned long pos = index << PAGE_SHIFT;
-       unsigned long offset;
+       loff_t pos = (loff_t)index << PAGE_SHIFT;
 
        if (ras->ras_stride_length == 0 || ras->ras_stride_bytes == 0 ||
            ras->ras_stride_bytes == ras->ras_stride_length)
                return false;
 
        if (pos >= ras->ras_stride_offset) {
-               offset = (pos - ras->ras_stride_offset) %
-                                       ras->ras_stride_length;
+               u64 offset;
+
+               div64_u64_rem(pos - ras->ras_stride_offset,
+                             ras->ras_stride_length, &offset);
                if (offset < ras->ras_stride_bytes ||
                    ras->ras_stride_length - offset < PAGE_SIZE)
                        return true;
@@ -1030,14 +1041,13 @@ static bool index_in_stride_window(struct ll_readahead_state *ras,
 }
 
 /*
- * ll_ras_enter() is used to detect read pattern according to
- * pos and count.
+ * ll_ras_enter() is used to detect read pattern according to pos and count.
  *
  * ras_update() is used to detect cache miss and
  * reset window or increase window accordingly
  */
 static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
-                      struct ll_readahead_state *ras, unsigned long index,
+                      struct ll_readahead_state *ras, pgoff_t index,
                       enum ras_update_flags flags)
 {
        struct ll_ra_info *ra = &sbi->ll_ra_info;
@@ -1061,13 +1071,13 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
                GOTO(out_unlock, 0);
 
        if (flags & LL_RAS_MMAP)
-               ras_detect_read_pattern(ras, sbi, index << PAGE_SHIFT,
+               ras_detect_read_pattern(ras, sbi, (loff_t)index << PAGE_SHIFT,
                                        PAGE_SIZE, true);
 
-       if (!hit && ras->ras_window_len &&
-           index < ras->ras_next_readahead &&
-           pos_in_window(index, ras->ras_window_start, 0,
-                         ras->ras_window_len)) {
+       if (!hit && ras->ras_window_pages &&
+           index < ras->ras_next_readahead_idx &&
+           pos_in_window(index, ras->ras_window_start_idx, 0,
+                         ras->ras_window_pages)) {
                ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW);
                ras->ras_need_increase_window = false;
 
@@ -1087,8 +1097,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
                         * is still intersect with normal sequential
                         * read-ahead window.
                         */
-                       if (ras->ras_window_start <
-                           ras->ras_stride_offset)
+                       if (ras->ras_window_start_idx < ras->ras_stride_offset)
                                ras_stride_reset(ras);
                        RAS_CDEBUG(ras);
                } else {
@@ -1108,17 +1117,18 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
        if (stride_io_mode(ras)) {
                /* Since stride readahead is sentivite to the offset
                 * of read-ahead, so we use original offset here,
-                * instead of ras_window_start, which is RPC aligned */
-               ras->ras_next_readahead = max(index + 1,
-                                             ras->ras_next_readahead);
-               ras->ras_window_start =
-                               max(ras->ras_stride_offset >> PAGE_SHIFT,
-                                   ras->ras_window_start);
+                * instead of ras_window_start_idx, which is RPC aligned.
+                */
+               ras->ras_next_readahead_idx = max(index + 1,
+                                                 ras->ras_next_readahead_idx);
+               ras->ras_window_start_idx =
+                               max_t(pgoff_t, ras->ras_window_start_idx,
+                                     ras->ras_stride_offset >> PAGE_SHIFT);
        } else {
-               if (ras->ras_next_readahead < ras->ras_window_start)
-                       ras->ras_next_readahead = ras->ras_window_start;
+               if (ras->ras_next_readahead_idx < ras->ras_window_start_idx)
+                       ras->ras_next_readahead_idx = ras->ras_window_start_idx;
                if (!hit)
-                       ras->ras_next_readahead = index + 1;
+                       ras->ras_next_readahead_idx = index + 1;
        }
 
        if (ras->ras_need_increase_window) {
@@ -1236,7 +1246,7 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
        ENTRY;
 
        if (wbc->range_cyclic) {
-               start = mapping->writeback_index << PAGE_SHIFT;
+               start = (loff_t)mapping->writeback_index << PAGE_SHIFT;
                end = OBD_OBJECT_EOF;
        } else {
                start = wbc->range_start;
@@ -1423,8 +1433,8 @@ static int kickoff_async_readahead(struct file *file, unsigned long pages)
        struct ll_readahead_state *ras = &fd->fd_ras;
        struct ll_ra_info *ra = &sbi->ll_ra_info;
        unsigned long throttle;
-       unsigned long start = ras_align(ras, ras->ras_next_readahead, NULL);
-       unsigned long end = start + pages - 1;
+       pgoff_t start_idx = ras_align(ras, ras->ras_next_readahead_idx);
+       pgoff_t end_idx = start_idx + pages - 1;
 
        throttle = min(ra->ra_async_pages_per_file_threshold,
                       ra->ra_max_pages_per_file);
@@ -1434,24 +1444,24 @@ static int kickoff_async_readahead(struct file *file, unsigned long pages)
         * we do async readahead, allowing the user thread to do fast i/o.
         */
        if (stride_io_mode(ras) || !throttle ||
-           ras->ras_window_len < throttle)
+           ras->ras_window_pages < throttle)
                return 0;
 
        if ((atomic_read(&ra->ra_cur_pages) + pages) > ra->ra_max_pages)
                return 0;
 
-       if (ras->ras_async_last_readpage == start)
+       if (ras->ras_async_last_readpage_idx == start_idx)
                return 1;
 
        /* ll_readahead_work_free() free it */
        OBD_ALLOC_PTR(lrw);
        if (lrw) {
                lrw->lrw_file = get_file(file);
-               lrw->lrw_start = start;
-               lrw->lrw_end = end;
+               lrw->lrw_start_idx = start_idx;
+               lrw->lrw_end_idx = end_idx;
                spin_lock(&ras->ras_lock);
-               ras->ras_next_readahead = end + 1;
-               ras->ras_async_last_readpage = start;
+               ras->ras_next_readahead_idx = end_idx + 1;
+               ras->ras_async_last_readpage_idx = start_idx;
                spin_unlock(&ras->ras_lock);
                ll_readahead_work_add(inode, lrw);
        } else {
@@ -1485,7 +1495,7 @@ int ll_readpage(struct file *file, struct page *vmpage)
                struct ll_readahead_state *ras = &fd->fd_ras;
                struct lu_env  *local_env = NULL;
                unsigned long fast_read_pages =
-                       max(RA_REMAIN_WINDOW_MIN, ras->ras_rpc_size);
+                       max(RA_REMAIN_WINDOW_MIN, ras->ras_rpc_pages);
                struct vvp_page *vpg;
 
                result = -ENODATA;
@@ -1516,8 +1526,8 @@ int ll_readpage(struct file *file, struct page *vmpage)
                        /* Check if we can issue a readahead RPC, if that is
                         * the case, we can't do fast IO because we will need
                         * a cl_io to issue the RPC. */
-                       if (ras->ras_window_start + ras->ras_window_len <
-                           ras->ras_next_readahead + fast_read_pages ||
+                       if (ras->ras_window_start_idx + ras->ras_window_pages <
+                           ras->ras_next_readahead_idx + fast_read_pages ||
                            kickoff_async_readahead(file, fast_read_pages) > 0)
                                result = 0;
                }
index c960269..4f6eb0a 100644 (file)
@@ -114,10 +114,10 @@ struct vvp_io {
        struct kiocb            *vui_iocb;
 
        /* Readahead state. */
-       pgoff_t vui_ra_start;
-       pgoff_t vui_ra_count;
+       pgoff_t                 vui_ra_start_idx;
+       pgoff_t                 vui_ra_pages;
        /* Set when vui_ra_{start,count} have been initialized. */
-       bool            vui_ra_valid;
+       bool                    vui_ra_valid;
 };
 
 extern struct lu_device_type vvp_device_type;
index f0ce954..97366bb 100644 (file)
@@ -768,17 +768,17 @@ static void vvp_io_setattr_fini(const struct lu_env *env,
 static int vvp_io_read_start(const struct lu_env *env,
                             const struct cl_io_slice *ios)
 {
-       struct vvp_io           *vio   = cl2vvp_io(env, ios);
-       struct cl_io            *io    = ios->cis_io;
-       struct cl_object        *obj   = io->ci_obj;
-       struct inode            *inode = vvp_object_inode(obj);
-       struct ll_inode_info    *lli   = ll_i2info(inode);
-       struct file             *file  = vio->vui_fd->fd_file;
-       loff_t  pos = io->u.ci_rd.rd.crw_pos;
-       long    cnt = io->u.ci_rd.rd.crw_count;
-       long    tot = vio->vui_tot_count;
-       int     exceed = 0;
-       int     result;
+       struct vvp_io *vio = cl2vvp_io(env, ios);
+       struct cl_io *io = ios->cis_io;
+       struct cl_object *obj = io->ci_obj;
+       struct inode *inode = vvp_object_inode(obj);
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct file *file = vio->vui_fd->fd_file;
+       loff_t pos = io->u.ci_rd.rd.crw_pos;
+       size_t cnt = io->u.ci_rd.rd.crw_count;
+       size_t tot = vio->vui_tot_count;
+       int exceed = 0;
+       int result;
        ENTRY;
 
        CLOBINVRNT(env, obj, vvp_object_invariant(obj));
@@ -816,15 +816,15 @@ static int vvp_io_read_start(const struct lu_env *env,
        /* initialize read-ahead window once per syscall */
        if (!vio->vui_ra_valid) {
                vio->vui_ra_valid = true;
-               vio->vui_ra_start = cl_index(obj, pos);
-               vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
+               vio->vui_ra_start_idx = cl_index(obj, pos);
+               vio->vui_ra_pages = cl_index(obj, tot + PAGE_SIZE - 1);
                /* If both start and end are unaligned, we read one more page
                 * than the index math suggests. */
-               if (pos % PAGE_SIZE != 0 && (pos + tot) % PAGE_SIZE != 0)
-                       vio->vui_ra_count++;
+               if ((pos & ~PAGE_MASK) != 0 && ((pos + tot) & ~PAGE_MASK) != 0)
+                       vio->vui_ra_pages++;
 
-               CDEBUG(D_READA, "tot %ld, ra_start %lu, ra_count %lu\n", tot,
-                      vio->vui_ra_start, vio->vui_ra_count);
+               CDEBUG(D_READA, "tot %zu, ra_start %lu, ra_count %lu\n",
+                      tot, vio->vui_ra_start_idx, vio->vui_ra_pages);
        }
 
        /* BUG: 5972 */
@@ -1545,7 +1545,7 @@ static int vvp_io_read_ahead(const struct lu_env *env,
                struct vvp_io *vio = cl2vvp_io(env, ios);
 
                if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-                       ra->cra_end = CL_PAGE_EOF;
+                       ra->cra_end_idx = CL_PAGE_EOF;
                        result = +1; /* no need to call down */
                }
        }
index 92b8ce5..36c83d4 100644 (file)
@@ -1044,7 +1044,8 @@ static int lov_io_read_ahead(const struct lu_env *env,
                              ra);
 
        CDEBUG(D_READA, DFID " cra_end = %lu, stripes = %d, rc = %d\n",
-              PFID(lu_object_fid(lov2lu(loo))), ra->cra_end, r0->lo_nr, rc);
+              PFID(lu_object_fid(lov2lu(loo))), ra->cra_end_idx,
+                   r0->lo_nr, rc);
        if (rc != 0)
                RETURN(rc);
 
@@ -1056,29 +1057,29 @@ static int lov_io_read_ahead(const struct lu_env *env,
         */
 
        /* cra_end is stripe level, convert it into file level */
-       ra_end = ra->cra_end;
+       ra_end = ra->cra_end_idx;
        if (ra_end != CL_PAGE_EOF)
-               ra->cra_end = lov_stripe_pgoff(loo->lo_lsm, index,
-                                              ra_end, stripe);
+               ra->cra_end_idx = lov_stripe_pgoff(loo->lo_lsm, index,
+                                                  ra_end, stripe);
 
        /* boundary of current component */
        ra_end = cl_index(obj, (loff_t)lov_io_extent(lio, index)->e_end);
-       if (ra_end != CL_PAGE_EOF && ra->cra_end >= ra_end)
-               ra->cra_end = ra_end - 1;
+       if (ra_end != CL_PAGE_EOF && ra->cra_end_idx >= ra_end)
+               ra->cra_end_idx = ra_end - 1;
 
        if (r0->lo_nr == 1) /* single stripe file */
                RETURN(0);
 
        pps = lov_lse(loo, index)->lsme_stripe_size >> PAGE_SHIFT;
 
-       CDEBUG(D_READA, DFID " max_index = %lu, pps = %u, index = %u, "
+       CDEBUG(D_READA, DFID " max_index = %lu, pps = %u, index = %d, "
               "stripe_size = %u, stripe no = %u, start index = %lu\n",
-              PFID(lu_object_fid(lov2lu(loo))), ra->cra_end, pps, index,
+              PFID(lu_object_fid(lov2lu(loo))), ra->cra_end_idx, pps, index,
               lov_lse(loo, index)->lsme_stripe_size, stripe, start);
 
        /* never exceed the end of the stripe */
-       ra->cra_end = min_t(pgoff_t,
-                           ra->cra_end, start + pps - start % pps - 1);
+       ra->cra_end_idx = min_t(pgoff_t, ra->cra_end_idx,
+                               start + pps - start % pps - 1);
        RETURN(0);
 }
 
index a640874..54ccb02 100644 (file)
@@ -1122,8 +1122,8 @@ static int mdc_io_read_ahead(const struct lu_env *env,
                ldlm_lock_decref(&lockh, dlmlock->l_req_mode);
        }
 
-       ra->cra_rpc_size = osc_cli(osc)->cl_max_pages_per_rpc;
-       ra->cra_end = CL_PAGE_EOF;
+       ra->cra_rpc_pages = osc_cli(osc)->cl_max_pages_per_rpc;
+       ra->cra_end_idx = CL_PAGE_EOF;
        ra->cra_release = osc_read_ahead_release;
        ra->cra_cbdata = dlmlock;
 
index e18d6f1..8bcb65f 100644 (file)
@@ -1511,7 +1511,7 @@ int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt,
                len = tail;
                offset = mbo->mbo_dom_size - len;
        }
-       LASSERT((offset % PAGE_SIZE) == 0);
+       LASSERT((offset & ~PAGE_MASK) == 0);
        rc = req_capsule_server_grow(pill, &RMF_NIOBUF_INLINE,
                                     sizeof(*rnb) + len);
        if (rc != 0) {
index 1d15cd5..e6528f1 100644 (file)
@@ -230,7 +230,7 @@ static void obd_t10_performance_test(const char *obd_name,
        for (start = jiffies, end = start + cfs_time_seconds(1) / 4,
             bcount = 0; time_before(jiffies, end) && rc == 0; bcount++) {
                rc = __obd_t10_performance_test(obd_name, cksum_type, page,
-                                               buf_len / PAGE_SIZE);
+                                               buf_len >> PAGE_SHIFT);
                if (rc)
                        break;
        }
index ff0652f..67e60b0 100644 (file)
@@ -1327,7 +1327,7 @@ static int osc_refresh_count(const struct lu_env *env,
                return 0;
        else if (cl_offset(obj, index + 1) > kms)
                /* catch sub-page write at end of file */
-               return kms % PAGE_SIZE;
+               return kms & ~PAGE_MASK;
        else
                return PAGE_SIZE;
 }
index 5a0d0b9..6a25750 100644 (file)
@@ -87,12 +87,12 @@ static int osc_io_read_ahead(const struct lu_env *env,
                        ldlm_lock_decref(&lockh, dlmlock->l_req_mode);
                }
 
-               ra->cra_rpc_size = osc_cli(osc)->cl_max_pages_per_rpc;
-               ra->cra_end = cl_index(osc2cl(osc),
-                                      dlmlock->l_policy_data.l_extent.end);
+               ra->cra_rpc_pages = osc_cli(osc)->cl_max_pages_per_rpc;
+               ra->cra_end_idx = cl_index(osc2cl(osc),
+                                          dlmlock->l_policy_data.l_extent.end);
                ra->cra_release = osc_read_ahead_release;
                ra->cra_cbdata = dlmlock;
-               if (ra->cra_end != CL_PAGE_EOF)
+               if (ra->cra_end_idx != CL_PAGE_EOF)
                        ra->cra_contention = true;
                result = 0;
        }
index 55b1fe4..0117810 100644 (file)
@@ -284,7 +284,7 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt,
                        } else if (lnb[i].lnb_data != NULL) {
                                int j, apages, abufsz;
                                abufsz = arc_buf_size(lnb[i].lnb_data);
-                               apages = abufsz / PAGE_SIZE;
+                               apages = abufsz >> PAGE_SHIFT;
                                /* these references to pages must be invalidated
                                 * to prevent access in osd_bufs_put() */
                                for (j = 0; j < apages; j++)
@@ -909,7 +909,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
                         * in this case it fallbacks to dmu_write() */
                        abufsz = arc_buf_size(lnb[i].lnb_data);
                        LASSERT(abufsz & PAGE_MASK);
-                       apages = abufsz / PAGE_SIZE;
+                       apages = abufsz >> PAGE_SHIFT;
                        LASSERT(i + apages <= npages);
                        /* these references to pages must be invalidated
                         * to prevent access in osd_bufs_put() */