Whamcloud - gitweb
LU-17383 statahead: quit statahead with a long time wait
authorQian Yingjin <qian@ddn.com>
Fri, 22 Dec 2023 09:16:07 +0000 (04:16 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Wed, 8 May 2024 06:00:53 +0000 (06:00 +0000)
If the thread is not doing stat for more than a time threshold
(@sbi->ll_sa_timeout, 30 seconds by default) then it probably does
not care too much about performance, or is no longer using this
directory.
Quit the statahead thread with a long time wait in this case.

This patch also fixes defects reported by Coverity Scan for
Lustre.

Also add the lines about ll_sa_timeout in
https://review.whamcloud.com/41308

Lustre-change: https://review.whamcloud.com/53535
Lustre-commit: cfcba1ede861faec33d797e876a0fb11eab4332a

Fixes: e10bf68d7c3 ("LU-14361 statahead: regularized fname statahead pattern")
Test-Parameters: testlist=parallel-scale-nfsv4
Test-Parameters: testlist=parallel-scale-nfsv4
Test-Parameters: testlist=parallel-scale-nfsv4
Test-Parameters: testlist=parallel-scale-nfsv4
Signed-off-by: Qian Yingjin <qian@ddn.com>
Change-Id: Ia7c478268fe12eeefa6dfae1b3c94451f010d1d5
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Timothy Day <timday@amazon.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/55014
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/lproc_llite.c
lustre/llite/statahead.c

index 1625c59..133c64b 100644 (file)
@@ -891,6 +891,12 @@ struct ll_sb_info {
                                                  * count */
        atomic_t                  ll_agl_total;  /* AGL thread started count */
 
+       /*
+        * stop the statahead thread if it is not doing a stat() in such time
+        * period as it probably does not care too much about performance or
+        * the user is no longer using this directory.
+        */
+       unsigned long             ll_sa_timeout;
        dev_t                     ll_sdev_orig; /* save s_dev before assign for
                                                 * clustred nfs */
        /* root squash */
@@ -1631,6 +1637,8 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
 #define LL_SA_RPC_DEF           32
 #define LL_SA_RPC_MAX           512
 
+#define LL_SA_TIMEOUT_DEF      30
+
 /* XXX: If want to support more concurrent statahead instances,
  *     please consider to decentralize the RPC lists attached
  *     on related import, such as imp_{sending,delayed}_list.
index 115ea83..be89fb8 100644 (file)
@@ -167,6 +167,7 @@ static struct ll_sb_info *ll_init_sbi(struct lustre_sb_info *lsi)
        /* metadata statahead is enabled by default */
        sbi->ll_sa_running_max = LL_SA_RUNNING_DEF;
        sbi->ll_sa_max = LL_SA_RPC_DEF;
+       sbi->ll_sa_timeout = LL_SA_TIMEOUT_DEF;
        atomic_set(&sbi->ll_sa_total, 0);
        atomic_set(&sbi->ll_sa_wrong, 0);
        atomic_set(&sbi->ll_sa_running, 0);
index 9c24359..1afe4b7 100644 (file)
@@ -1031,6 +1031,35 @@ static ssize_t statahead_max_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(statahead_max);
 
+static ssize_t statahead_timeout_show(struct kobject *kobj,
+                                     struct attribute *attr,
+                                     char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return scnprintf(buf, PAGE_SIZE, "%lu\n", sbi->ll_sa_timeout);
+}
+
+static ssize_t statahead_timeout_store(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      const char *buffer,
+                                      size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned long val;
+       int rc;
+
+       rc = kstrtoul(buffer, 0, &val);
+       if (rc)
+               return rc;
+
+       sbi->ll_sa_timeout = val;
+       return count;
+}
+LUSTRE_RW_ATTR(statahead_timeout);
+
 static ssize_t statahead_agl_show(struct kobject *kobj,
                                  struct attribute *attr,
                                  char *buf)
@@ -2131,6 +2160,7 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_stats_track_gid.attr,
        &lustre_attr_statahead_running_max.attr,
        &lustre_attr_statahead_max.attr,
+       &lustre_attr_statahead_timeout.attr,
        &lustre_attr_statahead_agl.attr,
        &lustre_attr_lazystatfs.attr,
        &lustre_attr_statfs_max_age.attr,
index ac952a2..edb165a 100644 (file)
@@ -1057,7 +1057,10 @@ static int ll_statahead_thread(void *arg)
 
        ll_dir_chain_init(&chain);
        /* matches smp_store_release() in ll_deauthorize_statahead() */
-       while (pos != MDS_DIR_END_OFF && smp_load_acquire(&sai->sai_task)) {
+       while (pos != MDS_DIR_END_OFF &&
+              /* matches smp_store_release() in ll_deauthorize_statahead() */
+              smp_load_acquire(&sai->sai_task) &&
+              lli->lli_sa_enabled) {
                struct lu_dirpage *dp;
                struct lu_dirent  *ent;
 
@@ -1085,7 +1088,7 @@ static int ll_statahead_thread(void *arg)
                for (ent = lu_dirent_start(dp);
                     /* matches smp_store_release() in ll_deauthorize_statahead() */
                     ent != NULL && smp_load_acquire(&sai->sai_task) &&
-                    !sa_low_hit(sai);
+                    !sa_low_hit(sai) && lli->lli_sa_enabled;
                     ent = lu_dirent_next(ent)) {
                        __u64 hash;
                        int namelen;
@@ -1140,6 +1143,8 @@ static int ll_statahead_thread(void *arg)
                                 /* matches smp_store_release() in
                                  * ll_deauthorize_statahead() */
                                 smp_load_acquire(&sai->sai_task); })) {
+                               long timeout;
+
                                if (sa_has_callback(sai)) {
                                        __set_current_state(TASK_RUNNING);
                                        sa_handle_callback(sai);
@@ -1164,7 +1169,20 @@ static int ll_statahead_thread(void *arg)
 
                                if (!sa_sent_full(sai))
                                        break;
-                               schedule();
+
+                               /*
+                                * If the thread is not doing stat in
+                                * @sbi->ll_sa_timeout (30s) then it probably
+                                * does not care too much about performance,
+                                * or is no longer using this directory.
+                                * Stop the statahead thread in this case.
+                                */
+                               timeout = schedule_timeout(
+                                       cfs_time_seconds(sbi->ll_sa_timeout));
+                               if (timeout == 0) {
+                                       lli->lli_sa_enabled = 0;
+                                       break;
+                               }
                        }
                        __set_current_state(TASK_RUNNING);