Whamcloud - gitweb
LU-12368 obdclass: don't send multiple statfs RPCs 80/35380/5
authorAndreas Dilger <adilger@whamcloud.com>
Sat, 29 Jun 2019 01:10:41 +0000 (19:10 -0600)
committerOleg Drokin <green@whamcloud.com>
Fri, 12 Jul 2019 05:22:50 +0000 (05:22 +0000)
If multiple threads are racing to send a non-cached OST_STATFS or
MDS_STATFS RPC, this can cause a significant RPC storm for systems
with many-core clients and many OSTs due to amplification of the
requests, and the fact that STATFS RPCs are sent asynchronously.
Some logs have shown few 96-core clients have 20k+ OST_STATFS RPCs
in flight concurrently, which can overload the network if many OSTs
are on the same OSS nodes (osc.*.max_rpcs_in_flight is per OST).

This was not previously a significant issue when core counts were
smaller on the clients, or with fewer OSTs per OSS.

If a thread can't use the cached statfs values, limit statfs to one
thread at a time, since the thread(s) would be blocked waiting for
the RPC replies anyway, which can't finish faster if many are sent.

Also add a llite.*.statfs_max_age parameter that can be tuned on
to control the maximum age (in seconds) of the statfs cache.  This
can avoid overhead for workloads that are statfs heavy, given that
the filesystem is _probably_ not running out of space this second,
and even so "statfs" does not guarantee space in parallel workloads.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I95690e37aecbac08ac5768a5e5c6c70ca258a832
Reviewed-on: https://review.whamcloud.com/35380
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Li Xi <lixi@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd.h
lustre/include/obd_class.h
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/lproc_llite.c

index 7c572cb..368dda1 100644 (file)
@@ -380,6 +380,8 @@ struct echo_client_obd {
 
 /* allow statfs data caching for 1 second */
 #define OBD_STATFS_CACHE_SECONDS 1
+/* arbitrary maximum. larger would be useless, allows catching bogus input */
+#define OBD_STATFS_CACHE_MAX_AGE 3600 /* seconds */
 
 #define lov_tgt_desc lu_tgt_desc
 
index 695ef46..dd83ca6 100644 (file)
@@ -1058,13 +1058,31 @@ static inline int obd_statfs(const struct lu_env *env, struct obd_export *exp,
        if (obd->obd_osfs_age < max_age ||
            ((obd->obd_osfs.os_state & OS_STATE_SUM) &&
             !(flags & OBD_STATFS_SUM))) {
-               rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags);
+               bool update_age = false;
+               /* the RPC will block anyway, so avoid sending many at once */
+               rc = mutex_lock_interruptible(&obd->obd_dev_mutex);
+               if (rc)
+                       RETURN(rc);
+               if (obd->obd_osfs_age < max_age ||
+                   ((obd->obd_osfs.os_state & OS_STATE_SUM) &&
+                    !(flags & OBD_STATFS_SUM))) {
+                       rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags);
+                       update_age = true;
+               } else {
+                       CDEBUG(D_SUPER,
+                              "%s: new %p cache blocks %llu/%llu objects %llu/%llu\n",
+                              obd->obd_name, &obd->obd_osfs,
+                              obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks,
+                              obd->obd_osfs.os_ffree, obd->obd_osfs.os_files);
+               }
                if (rc == 0) {
                        spin_lock(&obd->obd_osfs_lock);
                        memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
-                       obd->obd_osfs_age = ktime_get_seconds();
+                       if (update_age)
+                               obd->obd_osfs_age = ktime_get_seconds();
                        spin_unlock(&obd->obd_osfs_lock);
                }
+               mutex_unlock(&obd->obd_dev_mutex);
        } else {
                CDEBUG(D_SUPER,
                       "%s: use %p cache blocks %llu/%llu objects %llu/%llu\n",
index ce84e89..94af886 100644 (file)
@@ -595,6 +595,9 @@ struct ll_sb_info {
        /* st_blksize returned by stat(2), when non-zero */
        unsigned int              ll_stat_blksize;
 
+       /* maximum relative age of cached statfs results */
+       unsigned int              ll_statfs_max_age;
+
        struct kset               ll_kset;      /* sysfs object */
        struct completion         ll_kobj_unregister;
 
index 8015083..0ffbd6e 100644 (file)
@@ -91,6 +91,7 @@ static struct ll_sb_info *ll_init_sbi(void)
        spin_lock_init(&sbi->ll_pp_extent_lock);
        spin_lock_init(&sbi->ll_process_lock);
         sbi->ll_rw_stats_on = 0;
+       sbi->ll_statfs_max_age = OBD_STATFS_CACHE_SECONDS;
 
         si_meminfo(&si);
         pages = si.totalram - si.totalhigh;
@@ -331,7 +332,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         * can make sure the client can be mounted as long as MDT0 is
         * avaible */
        err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
-                       ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
+                       ktime_get_seconds() - sbi->ll_statfs_max_age,
                        OBD_STATFS_FOR_MDT0);
        if (err)
                GOTO(out_md_fid, err);
@@ -1938,7 +1939,7 @@ int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
        int rc;
 
        ENTRY;
-       max_age = ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS;
+       max_age = ktime_get_seconds() - sbi->ll_statfs_max_age;
 
        rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
        if (rc)
index 2376f62..8728cc7 100644 (file)
@@ -875,6 +875,36 @@ static ssize_t lazystatfs_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(lazystatfs);
 
+static ssize_t statfs_max_age_show(struct kobject *kobj, struct attribute *attr,
+                                  char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_statfs_max_age);
+}
+
+static ssize_t statfs_max_age_store(struct kobject *kobj,
+                                   struct attribute *attr, const char *buffer,
+                                   size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned int val;
+       int rc;
+
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc)
+               return rc;
+       if (val > OBD_STATFS_CACHE_MAX_AGE)
+               return -EINVAL;
+
+       sbi->ll_statfs_max_age = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(statfs_max_age);
+
 static ssize_t max_easize_show(struct kobject *kobj,
                               struct attribute *attr,
                               char *buf)
@@ -1477,6 +1507,7 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_statahead_max.attr,
        &lustre_attr_statahead_agl.attr,
        &lustre_attr_lazystatfs.attr,
+       &lustre_attr_statfs_max_age.attr,
        &lustre_attr_max_easize.attr,
        &lustre_attr_default_easize.attr,
        &lustre_attr_xattr_cache.attr,