From 51e34e05f81f1860348db4aae2d8b985f42abeed Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Fri, 22 Dec 2023 04:16:07 -0500 Subject: [PATCH] LU-17383 statahead: quit statahead with a long time wait If the thread is not doing stat for more than a time threshold (@sbi->ll_sa_timeout, 30 seconds by default) then it probably does not care too much about performance, or is no longer using this directory. Quit the statahead thread with a long time wait in this case. This patch also fixes defects reported by Coverity Scan for Lustre. Also add the lines about ll_sa_timeout in https://review.whamcloud.com/41308 Lustre-change: https://review.whamcloud.com/53535 Lustre-commit: cfcba1ede861faec33d797e876a0fb11eab4332a Fixes: e10bf68d7c3 ("LU-14361 statahead: regularized fname statahead pattern") Test-Parameters: testlist=parallel-scale-nfsv4 Test-Parameters: testlist=parallel-scale-nfsv4 Test-Parameters: testlist=parallel-scale-nfsv4 Test-Parameters: testlist=parallel-scale-nfsv4 Signed-off-by: Qian Yingjin Change-Id: Ia7c478268fe12eeefa6dfae1b3c94451f010d1d5 Reviewed-by: Lai Siyao Reviewed-by: Andreas Dilger Reviewed-by: Timothy Day Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/55014 Tested-by: jenkins Tested-by: Maloo --- lustre/llite/llite_internal.h | 8 ++++++++ lustre/llite/llite_lib.c | 1 + lustre/llite/lproc_llite.c | 30 ++++++++++++++++++++++++++++++ lustre/llite/statahead.c | 24 +++++++++++++++++++++--- 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 1625c59..133c64bf 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -891,6 +891,12 @@ struct ll_sb_info { * count */ atomic_t ll_agl_total; /* AGL thread started count */ + /* + * stop the statahead thread if it is not doing a stat() in such time + * period as it probably does not care too much about performance or + * the user is no longer using this directory. + */ + unsigned long ll_sa_timeout; dev_t ll_sdev_orig; /* save s_dev before assign for * clustred nfs */ /* root squash */ @@ -1631,6 +1637,8 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which); #define LL_SA_RPC_DEF 32 #define LL_SA_RPC_MAX 512 +#define LL_SA_TIMEOUT_DEF 30 + /* XXX: If want to support more concurrent statahead instances, * please consider to decentralize the RPC lists attached * on related import, such as imp_{sending,delayed}_list. diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 115ea83..be89fb8 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -167,6 +167,7 @@ static struct ll_sb_info *ll_init_sbi(struct lustre_sb_info *lsi) /* metadata statahead is enabled by default */ sbi->ll_sa_running_max = LL_SA_RUNNING_DEF; sbi->ll_sa_max = LL_SA_RPC_DEF; + sbi->ll_sa_timeout = LL_SA_TIMEOUT_DEF; atomic_set(&sbi->ll_sa_total, 0); atomic_set(&sbi->ll_sa_wrong, 0); atomic_set(&sbi->ll_sa_running, 0); diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 9c24359..1afe4b7 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -1031,6 +1031,35 @@ static ssize_t statahead_max_store(struct kobject *kobj, } LUSTRE_RW_ATTR(statahead_max); +static ssize_t statahead_timeout_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + + return scnprintf(buf, PAGE_SIZE, "%lu\n", sbi->ll_sa_timeout); +} + +static ssize_t statahead_timeout_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t count) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + unsigned long val; + int rc; + + rc = kstrtoul(buffer, 0, &val); + if (rc) + return rc; + + sbi->ll_sa_timeout = val; + return count; +} +LUSTRE_RW_ATTR(statahead_timeout); + static ssize_t statahead_agl_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -2131,6 +2160,7 @@ static struct attribute *llite_attrs[] = { &lustre_attr_stats_track_gid.attr, &lustre_attr_statahead_running_max.attr, &lustre_attr_statahead_max.attr, + &lustre_attr_statahead_timeout.attr, &lustre_attr_statahead_agl.attr, &lustre_attr_lazystatfs.attr, &lustre_attr_statfs_max_age.attr, diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index ac952a2..edb165a 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -1057,7 +1057,10 @@ static int ll_statahead_thread(void *arg) ll_dir_chain_init(&chain); /* matches smp_store_release() in ll_deauthorize_statahead() */ - while (pos != MDS_DIR_END_OFF && smp_load_acquire(&sai->sai_task)) { + while (pos != MDS_DIR_END_OFF && + /* matches smp_store_release() in ll_deauthorize_statahead() */ + smp_load_acquire(&sai->sai_task) && + lli->lli_sa_enabled) { struct lu_dirpage *dp; struct lu_dirent *ent; @@ -1085,7 +1088,7 @@ static int ll_statahead_thread(void *arg) for (ent = lu_dirent_start(dp); /* matches smp_store_release() in ll_deauthorize_statahead() */ ent != NULL && smp_load_acquire(&sai->sai_task) && - !sa_low_hit(sai); + !sa_low_hit(sai) && lli->lli_sa_enabled; ent = lu_dirent_next(ent)) { __u64 hash; int namelen; @@ -1140,6 +1143,8 @@ static int ll_statahead_thread(void *arg) /* matches smp_store_release() in * ll_deauthorize_statahead() */ smp_load_acquire(&sai->sai_task); })) { + long timeout; + if (sa_has_callback(sai)) { __set_current_state(TASK_RUNNING); sa_handle_callback(sai); @@ -1164,7 +1169,20 @@ static int ll_statahead_thread(void *arg) if (!sa_sent_full(sai)) break; - schedule(); + + /* + * If the thread is not doing stat in + * @sbi->ll_sa_timeout (30s) then it probably + * does not care too much about performance, + * or is no longer using this directory. + * Stop the statahead thread in this case. + */ + timeout = schedule_timeout( + cfs_time_seconds(sbi->ll_sa_timeout)); + if (timeout == 0) { + lli->lli_sa_enabled = 0; + break; + } } __set_current_state(TASK_RUNNING); -- 1.8.3.1