Whamcloud - gitweb
LU-11548 llite: increase readahead default values 00/33400/6
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 19 Oct 2018 07:31:41 +0000 (01:31 -0600)
committerOleg Drokin <green@whamcloud.com>
Fri, 2 Oct 2020 00:18:58 +0000 (00:18 +0000)
It is commonly recommended to increase the readahead tunables
for clients to increase performance, since the current defaults
are too small, having been set several years ago for slower
networks and servers.

Increase the readahead defaults to better match values that are
recommended today:
- read_ahead_max_mb increased from 64MB to 1GB by default,
  or 1/32 RAM, whichever is less
- read_ahead_per_file_max_mb is increased from 64MB to 256MB,
  or 1/4 of read_ahead_max_mb, whichever is less

Modify the constant names to better match the variable and /proc
filenames.

Fix sanity test_101g to allow readahead to generate extra read
RPCs, as long as they are the expected size or larger.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Iec864788fa1979c27adad42e613d1bf03f3ebbe5
Reviewed-on: https://review.whamcloud.com/33400
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Wang Shilong <wshilong@whamcloud.com>
Reviewed-by: Yingjin Qian <qian@ddn.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/tests/sanity.sh

index 38d7495..2cb186f 100644 (file)
@@ -239,16 +239,16 @@ struct ll_inode_info {
                };
        };
 
                };
        };
 
-        /* XXX: For following frequent used members, although they maybe special
-         *      used for non-directory object, it is some time-wasting to check
-         *      whether the object is directory or not before using them. On the
-         *      other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
-         *      the "ll_inode_info" size even if moving those members into u.f.
-         *      So keep them out side.
-         *
-         *      In the future, if more members are added only for directory,
-         *      some of the following members can be moved into u.f.
-         */
+       /* XXX: For following frequent used members, although they maybe special
+        *      used for non-directory object, it is some time-wasting to check
+        *      whether the object is directory or not before using them. On the
+        *      other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
+        *      the "ll_inode_info" size even if moving those members into u.f.
+        *      So keep them out side.
+        *
+        *      In the future, if more members are added only for directory,
+        *      some of the following members can be moved into u.f.
+        */
        struct cl_object                *lli_clob;
 
        /* mutex to request for layout lock exclusively. */
        struct cl_object                *lli_clob;
 
        /* mutex to request for layout lock exclusively. */
@@ -433,10 +433,10 @@ static inline bool ll_file_test_and_clear_flag(struct ll_inode_info *lli,
 int ll_xattr_cache_destroy(struct inode *inode);
 
 int ll_xattr_cache_get(struct inode *inode,
 int ll_xattr_cache_destroy(struct inode *inode);
 
 int ll_xattr_cache_get(struct inode *inode,
-                       const char *name,
-                       char *buffer,
-                       size_t size,
-                       __u64 valid);
+                      const char *name,
+                      char *buffer,
+                      size_t size,
+                      __u64 valid);
 
 int ll_xattr_cache_insert(struct inode *inode,
                          const char *name,
 
 int ll_xattr_cache_insert(struct inode *inode,
                          const char *name,
@@ -447,7 +447,7 @@ static inline bool obd_connect_has_secctx(struct obd_connect_data *data)
 {
 #ifdef CONFIG_SECURITY
        return data->ocd_connect_flags & OBD_CONNECT_FLAGS2 &&
 {
 #ifdef CONFIG_SECURITY
        return data->ocd_connect_flags & OBD_CONNECT_FLAGS2 &&
-              data->ocd_connect_flags2 & OBD_CONNECT2_FILE_SECCTX;
+               data->ocd_connect_flags2 & OBD_CONNECT2_FILE_SECCTX;
 #else
        return false;
 #endif
 #else
        return false;
 #endif
@@ -509,8 +509,11 @@ static inline struct pcc_inode *ll_i2pcci(struct inode *inode)
 /* default to use at least 16M for fast read if possible */
 #define RA_REMAIN_WINDOW_MIN                   MiB_TO_PAGES(16UL)
 
 /* default to use at least 16M for fast read if possible */
 #define RA_REMAIN_WINDOW_MIN                   MiB_TO_PAGES(16UL)
 
-/* default readahead on a given system. */
-#define SBI_DEFAULT_READ_AHEAD_MAX             MiB_TO_PAGES(64UL)
+/* default read-ahead on a given client mountpoint. */
+#define SBI_DEFAULT_READ_AHEAD_MAX             MiB_TO_PAGES(1024UL)
+
+/* default read-ahead for a single file descriptor */
+#define SBI_DEFAULT_READ_AHEAD_PER_FILE_MAX    MiB_TO_PAGES(256UL)
 
 /* default read-ahead full files smaller than limit on the second read */
 #define SBI_DEFAULT_READ_AHEAD_WHOLE_MAX       MiB_TO_PAGES(2UL)
 
 /* default read-ahead full files smaller than limit on the second read */
 #define SBI_DEFAULT_READ_AHEAD_WHOLE_MAX       MiB_TO_PAGES(2UL)
index a3b4a04..2547302 100644 (file)
@@ -129,11 +129,13 @@ static struct ll_sb_info *ll_init_sbi(void)
        if (sbi->ll_cache == NULL)
                GOTO(out_destroy_ra, rc = -ENOMEM);
 
        if (sbi->ll_cache == NULL)
                GOTO(out_destroy_ra, rc = -ENOMEM);
 
-       sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
-                                                   SBI_DEFAULT_READ_AHEAD_MAX);
+       sbi->ll_ra_info.ra_max_pages =
+               min(pages / 32, SBI_DEFAULT_READ_AHEAD_MAX);
+       sbi->ll_ra_info.ra_max_pages_per_file =
+               min(sbi->ll_ra_info.ra_max_pages / 4,
+                   SBI_DEFAULT_READ_AHEAD_PER_FILE_MAX);
        sbi->ll_ra_info.ra_async_pages_per_file_threshold =
                                sbi->ll_ra_info.ra_max_pages_per_file;
        sbi->ll_ra_info.ra_async_pages_per_file_threshold =
                                sbi->ll_ra_info.ra_max_pages_per_file;
-       sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
        sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
        atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
 
        sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
        atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
 
index b4e016d..924d05a 100755 (executable)
@@ -9551,12 +9551,11 @@ test_101g_brw_size_test() {
                sed -n '/pages per rpc/,/^$/p' |
                awk '/'$pages':/ { reads += $2; writes += $6 }; \
                END { print reads,writes }'))
                sed -n '/pages per rpc/,/^$/p' |
                awk '/'$pages':/ { reads += $2; writes += $6 }; \
                END { print reads,writes }'))
-       [ ${rpcs[0]} -ne $count ] && error "${rpcs[0]} != $count read RPCs" &&
-               return 5
-       [ ${rpcs[1]} -ne $count ] && error "${rpcs[1]} != $count write RPCs" &&
-               return 6
-
-       return 0
+       # allow one extra full-sized read RPC for async readahead
+       [[ ${rpcs[0]} == $count || ${rpcs[0]} == $((count + 1)) ]] ||
+               { error "${rpcs[0]} != $count read RPCs"; return 5; }
+       [[ ${rpcs[1]} == $count ]] ||
+               { error "${rpcs[1]} != $count write RPCs"; return 6; }
 }
 
 test_101g() {
 }
 
 test_101g() {