Whamcloud - gitweb
LU-9574 llite: pipeline readahead better with large I/O 88/27388/4
authorJinshan Xiong <jinshan.xiong@intel.com>
Thu, 1 Jun 2017 19:53:35 +0000 (12:53 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 21 Sep 2017 06:13:05 +0000 (06:13 +0000)
Fixed a bug where next readahead is not set correctly when
appplication issues large I/O;
Extend the readahead window length to at least cover the size of
current I/O.

Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Change-Id: I43c5e4f25ea30d4a36263db2588bde0401122990
Reviewed-on: https://review.whamcloud.com/27388
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Patrick Farrell <paf@cray.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/llite/rw.c

index 4eece87..c08c519 100644 (file)
@@ -488,9 +488,6 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
                        end = end_index;
                        ria->ria_eof = true;
                }
-
-               ras->ras_next_readahead = max(end, end + 1);
-               RAS_CDEBUG(ras);
         }
         ria->ria_start = start;
         ria->ria_end = end;
@@ -512,6 +509,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
                RETURN(0);
        }
 
+       RAS_CDEBUG(ras);
        CDEBUG(D_READA, DFID": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
               PFID(lu_object_fid(&clob->co_lu)),
               ria->ria_start, ria->ria_end,
@@ -549,24 +547,18 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
        if (ra_end == end && ra_end == (kms >> PAGE_SHIFT))
                ll_ra_stats_inc(inode, RA_STAT_EOF);
 
-       /* if we didn't get to the end of the region we reserved from
-        * the ras we need to go back and update the ras so that the
-        * next read-ahead tries from where we left off.  we only do so
-        * if the region we failed to issue read-ahead on is still ahead
-        * of the app and behind the next index to start read-ahead from */
        CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
               ra_end, end, ria->ria_end, ret);
 
-       if (ra_end > 0 && ra_end != end) {
+       if (ra_end != end)
                ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
+       if (ra_end > 0) {
+               /* update the ras so that the next read-ahead tries from
+                * where we left off. */
                spin_lock(&ras->ras_lock);
-               if (ra_end <= ras->ras_next_readahead &&
-                   index_in_window(ra_end, ras->ras_window_start, 0,
-                                   ras->ras_window_len)) {
-                       ras->ras_next_readahead = ra_end + 1;
-                       RAS_CDEBUG(ras);
-               }
+               ras->ras_next_readahead = ra_end + 1;
                spin_unlock(&ras->ras_lock);
+               RAS_CDEBUG(ras);
        }
 
        RETURN(ret);
@@ -847,7 +839,8 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
                /* Since stride readahead is sentivite to the offset
                 * of read-ahead, so we use original offset here,
                 * instead of ras_window_start, which is RPC aligned */
-               ras->ras_next_readahead = max(index, ras->ras_next_readahead);
+               ras->ras_next_readahead = max(index + 1,
+                                             ras->ras_next_readahead);
                ras->ras_window_start = max(ras->ras_stride_offset,
                                            ras->ras_window_start);
        } else {