LU-15274 llite: whole file read fixes

author Patrick Farrell <pfarrell@whamcloud.com>

Mon, 12 Feb 2024 16:56:00 +0000 (11:56 -0500)

committer Oleg Drokin <green@whamcloud.com>

Mon, 4 Mar 2024 20:00:39 +0000 (20:00 +0000)
author Patrick Farrell <pfarrell@whamcloud.com>
Mon, 12 Feb 2024 16:56:00 +0000 (11:56 -0500)
committer Oleg Drokin <green@whamcloud.com>
Mon, 4 Mar 2024 20:00:39 +0000 (20:00 +0000)
diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h

index 5690f0d..779c132 100644 (file)
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -1086,8 +1086,8 @@ struct ll_readahead_state {
         pgoff_t         ras_async_last_readpage_idx;
         /* whether we should increase readahead window */
         bool            ras_need_increase_window;
-       /* whether ra miss check should be skipped */
-       bool            ras_no_miss_check;
+       /* we're attempting to read the whole file */
+       bool            ras_whole_file_read;
  };
  
  struct ll_readahead_work {
diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c

index 48eaba7..cd97b13 100644 (file)
--- a/lustre/llite/rw.c
+++ b/lustre/llite/rw.c
@@ -1247,14 +1247,13 @@ void ll_ras_enter(struct file *f, loff_t pos, size_t bytes)
         struct ll_file_data *fd = f->private_data;
         struct ll_readahead_state *ras = &fd->fd_ras;
         struct inode *inode = file_inode(f);
-       unsigned long index = pos >> PAGE_SHIFT;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
  
         spin_lock(&ras->ras_lock);
         ras->ras_requests++;
         ras->ras_consecutive_requests++;
         ras->ras_need_increase_window = false;
-       ras->ras_no_miss_check = false;
+       ras->ras_whole_file_read = false;
         /*
          * On the second access to a file smaller than the tunable
          * ra_max_read_ahead_whole_pages trigger RA on all pages in the
@@ -1275,11 +1274,11 @@ void ll_ras_enter(struct file *f, loff_t pos, size_t bytes)
  
                 if (kms_pages &&
                     kms_pages <= ra->ra_max_read_ahead_whole_pages) {
+                       ras->ras_whole_file_read = true;
                         ras->ras_window_start_idx = 0;
-                       ras->ras_next_readahead_idx = index + 1;
+                       ras->ras_next_readahead_idx = 0;
                         ras->ras_window_pages = min(ra->ra_max_pages_per_file,
                                             ra->ra_max_read_ahead_whole_pages);
-                       ras->ras_no_miss_check = true;
                         GOTO(out_unlock, 0);
                 }
         }
@@ -1339,7 +1338,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
          * Because we will read whole file to page cache even if
          * some pages missed.
          */
-       if (ras->ras_no_miss_check)
+       if (ras->ras_whole_file_read)
                 GOTO(out_unlock, 0);
  
         if (io && io->ci_rand_read)
@@ -1890,10 +1889,14 @@ static bool ll_use_fast_io(struct file *file,
                 skip_pages = fast_read_pages;
         }
  
-       if (ras->ras_window_start_idx + ras->ras_window_pages <
+       RAS_CDEBUG(ras);
+
+       if (ras->ras_whole_file_read ||
+           ras->ras_window_start_idx + ras->ras_window_pages <
             ras->ras_next_readahead_idx + skip_pages ||
-           kickoff_async_readahead(file, fast_read_pages) > 0)
+           kickoff_async_readahead(file, fast_read_pages) > 0) {
                 return true;
+       }
  
         return false;
  }
@@ -1970,6 +1973,8 @@ int ll_readpage(struct file *file, struct page *vmpage)
                 page = cl_vmpage_page(vmpage, clob);
                 if (page == NULL) {
                         unlock_page(vmpage);
+                       CDEBUG(D_READA, "fast read: failed to find page %ld\n",
+                               vmpage->index);
                         ll_ra_stats_inc_sbi(sbi, RA_STAT_FAILED_FAST_READ);
                         RETURN(result);
                 }
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index a3f223b..91a5010 100755 (executable)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -23830,6 +23830,61 @@ test_248b() {
  }
  run_test 248b "test short_io read and write for both small and large sizes"
  
+test_248c() {
+       $LCTL set_param llite.*.read_ahead_stats=c
+       # This test compares whole file readahead to non-whole file readahead
+       # The performance should be consistently slightly faster for whole file,
+       # and the bug caused whole file readahead to be 80-100x slower, but to
+       # account for possible test system lag, we require whole file to be
+       # <= 1.5xnon-whole file, and require 3 failures in a row to fail the
+       # test
+       local time1
+       local time2
+       local whole_mb=$($LCTL get_param -n llite.*.max_read_ahead_whole_mb)
+       stack_trap "$LCTL set_param llite.*.max_read_ahead_whole_mb=$whole_mb" EXIT
+       counter=0
+
+       while [ $counter -lt 3 ]; do
+               # Increment the counter
+               ((counter++))
+
+               $LCTL set_param llite.*.max_read_ahead_whole_mb=64
+               rm -f $DIR/$tfile
+               touch $DIR/$tfile || error "(0) failed to create file"
+               # 64 MiB
+               $TRUNCATE $DIR/$tfile 67108864 || error "(1) failed to truncate file"
+               time1=$(dd if=$DIR/$tfile bs=4K of=/dev/null 2>&1 | awk '/bytes/ {print $8}')
+               echo "whole file readahead of 64 MiB took $time1 seconds"
+               $LCTL get_param llite.*.read_ahead_stats
+
+               $LCTL set_param llite.*.read_ahead_stats=c
+               $LCTL set_param llite.*.max_read_ahead_whole_mb=8
+               rm -f $DIR/$tfile
+               touch $DIR/$tfile || error "(2) failed to create file"
+               # 64 MiB
+               $TRUNCATE $DIR/$tfile 67108864 || error "(3) failed to create file"
+               time2=$(dd if=$DIR/$tfile bs=4K of=/dev/null 2>&1 | awk '/bytes/ {print $8}')
+               echo "non-whole file readahead of 64 MiB took $time2 seconds"
+               $LCTL get_param llite.*.read_ahead_stats
+
+               # Check if time1 is not more than 1.5 times2
+               timecheck=$(echo "$time1 <= (1.5 * $time2)" | bc -l)
+
+               if [[ $timecheck -eq 1 ]]; then
+                       echo "Test passed on attempt $counter"
+                       # Exit the loop
+                       counter=4
+               else
+                       echo "Attempt $counter failed: whole file readahead took: $time1, greater than non: $time2"
+               fi
+       done
+       if [ $counter -eq 3 ]; then
+               error "whole file readahead check failed 3 times in a row, probably not just VM lag"
+       fi
+
+}
+run_test 248c "verify whole file read behavior"
+
  test_249() { # LU-7890
         [ $MDS1_VERSION -lt $(version_code 2.8.53) ] &&
                 skip "Need at least version 2.8.54"
author	Patrick Farrell <pfarrell@whamcloud.com>
	Mon, 12 Feb 2024 16:56:00 +0000 (11:56 -0500)
committer	Oleg Drokin <green@whamcloud.com>
	Mon, 4 Mar 2024 20:00:39 +0000 (20:00 +0000)
lustre/llite/llite_internal.h		patch \| blob \| history
lustre/llite/rw.c		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history