If the thread is not doing stat for more than a time threshold
(@sbi->ll_sa_timeout, 30 seconds by default) then it probably does
not care too much about performance, or is no longer using this
directory.
Quit the statahead thread with a long time wait in this case.
This patch also fixes defects reported by Coverity Scan for
Lustre.
Also add the lines about ll_sa_timeout in
https://review.whamcloud.com/41308
Lustre-change: https://review.whamcloud.com/53535
Lustre-commit:
cfcba1ede861faec33d797e876a0fb11eab4332a
Fixes:
e10bf68d7c3 ("LU-14361 statahead: regularized fname statahead pattern")
Test-Parameters: testlist=parallel-scale-nfsv4
Test-Parameters: testlist=parallel-scale-nfsv4
Test-Parameters: testlist=parallel-scale-nfsv4
Test-Parameters: testlist=parallel-scale-nfsv4
Signed-off-by: Qian Yingjin <qian@ddn.com>
Change-Id: Ia7c478268fe12eeefa6dfae1b3c94451f010d1d5
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Timothy Day <timday@amazon.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/55014
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
* count */
atomic_t ll_agl_total; /* AGL thread started count */
+ /*
+ * stop the statahead thread if it is not doing a stat() in such time
+ * period as it probably does not care too much about performance or
+ * the user is no longer using this directory.
+ */
+ unsigned long ll_sa_timeout;
dev_t ll_sdev_orig; /* save s_dev before assign for
* clustred nfs */
/* root squash */
#define LL_SA_RPC_DEF 32
#define LL_SA_RPC_MAX 512
+#define LL_SA_TIMEOUT_DEF 30
+
/* XXX: If want to support more concurrent statahead instances,
* please consider to decentralize the RPC lists attached
* on related import, such as imp_{sending,delayed}_list.
/* metadata statahead is enabled by default */
sbi->ll_sa_running_max = LL_SA_RUNNING_DEF;
sbi->ll_sa_max = LL_SA_RPC_DEF;
+ sbi->ll_sa_timeout = LL_SA_TIMEOUT_DEF;
atomic_set(&sbi->ll_sa_total, 0);
atomic_set(&sbi->ll_sa_wrong, 0);
atomic_set(&sbi->ll_sa_running, 0);
}
LUSTRE_RW_ATTR(statahead_max);
+static ssize_t statahead_timeout_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "%lu\n", sbi->ll_sa_timeout);
+}
+
+static ssize_t statahead_timeout_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ unsigned long val;
+ int rc;
+
+ rc = kstrtoul(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ sbi->ll_sa_timeout = val;
+ return count;
+}
+LUSTRE_RW_ATTR(statahead_timeout);
+
static ssize_t statahead_agl_show(struct kobject *kobj,
struct attribute *attr,
char *buf)
&lustre_attr_stats_track_gid.attr,
&lustre_attr_statahead_running_max.attr,
&lustre_attr_statahead_max.attr,
+ &lustre_attr_statahead_timeout.attr,
&lustre_attr_statahead_agl.attr,
&lustre_attr_lazystatfs.attr,
&lustre_attr_statfs_max_age.attr,
ll_dir_chain_init(&chain);
/* matches smp_store_release() in ll_deauthorize_statahead() */
- while (pos != MDS_DIR_END_OFF && smp_load_acquire(&sai->sai_task)) {
+ while (pos != MDS_DIR_END_OFF &&
+ /* matches smp_store_release() in ll_deauthorize_statahead() */
+ smp_load_acquire(&sai->sai_task) &&
+ lli->lli_sa_enabled) {
struct lu_dirpage *dp;
struct lu_dirent *ent;
for (ent = lu_dirent_start(dp);
/* matches smp_store_release() in ll_deauthorize_statahead() */
ent != NULL && smp_load_acquire(&sai->sai_task) &&
- !sa_low_hit(sai);
+ !sa_low_hit(sai) && lli->lli_sa_enabled;
ent = lu_dirent_next(ent)) {
__u64 hash;
int namelen;
/* matches smp_store_release() in
* ll_deauthorize_statahead() */
smp_load_acquire(&sai->sai_task); })) {
+ long timeout;
+
if (sa_has_callback(sai)) {
__set_current_state(TASK_RUNNING);
sa_handle_callback(sai);
if (!sa_sent_full(sai))
break;
- schedule();
+
+ /*
+ * If the thread is not doing stat in
+ * @sbi->ll_sa_timeout (30s) then it probably
+ * does not care too much about performance,
+ * or is no longer using this directory.
+ * Stop the statahead thread in this case.
+ */
+ timeout = schedule_timeout(
+ cfs_time_seconds(sbi->ll_sa_timeout));
+ if (timeout == 0) {
+ lli->lli_sa_enabled = 0;
+ break;
+ }
}
__set_current_state(TASK_RUNNING);