Whamcloud - gitweb
LU-11721 lod: limit statfs ffree if less than OST ffree 67/34167/8
authorAndreas Dilger <adilger@whamcloud.com>
Sun, 3 Feb 2019 00:11:00 +0000 (17:11 -0700)
committerOleg Drokin <green@whamcloud.com>
Fri, 15 Mar 2019 23:46:21 +0000 (23:46 +0000)
If the OSTs report fewer total free objects than the MDTs, then
use the free files count reported by the OSTs, since it represents
the minimum number of files that can be created in the filesystem
(creating more may be possible, but this depends on other factors).
This has always been what ll_statfs_internal() reports, but the
statfs aggregation via the MDT missed this step in lod_statfs().

Fix a minor defect in sanity test_418() that would let it loop
forever until the test was killed due to timeout if the "df -i"
and "lfs df -i" output did not converge.

Fixes: b500d5193360 ("LU-10018 protocol: MDT as a statfs proxy")
Fixes: 263e80f4572b ("LU-11721 tests: wait for statfs to update ...")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Id8d7b7edfd854f1ec30bfbbb85f04b0c973ebbe5
Reviewed-on: https://review.whamcloud.com/34167
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Nikitas Angelinas <nangelinas@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_class.h
lustre/llite/llite_lib.c
lustre/lmv/lmv_obd.c
lustre/lod/lod_dev.c
lustre/tests/sanity.sh

index f0fcedb..9ad5ec8 100644 (file)
@@ -1020,8 +1020,9 @@ static inline int obd_statfs_async(struct obd_export *exp,
 
        obd = exp->exp_obd;
        if (!obd->obd_type || !obd->obd_type->typ_dt_ops->o_statfs) {
 
        obd = exp->exp_obd;
        if (!obd->obd_type || !obd->obd_type->typ_dt_ops->o_statfs) {
-               CERROR("%s: no %s operation\n", obd->obd_name, __func__);
-               RETURN(-EOPNOTSUPP);
+               rc = -EOPNOTSUPP;
+               CERROR("%s: no statfs operation: rc = %d\n", obd->obd_name, rc);
+               RETURN(rc);
        }
 
        CDEBUG(D_SUPER, "%s: age %lld, max_age %lld\n",
        }
 
        CDEBUG(D_SUPER, "%s: age %lld, max_age %lld\n",
index 9766890..bf17554 100644 (file)
@@ -1797,57 +1797,54 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
 int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
                       u32 flags)
 {
 int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
                       u32 flags)
 {
-       struct obd_statfs obd_osfs;
+       struct obd_statfs obd_osfs = { 0 };
        time64_t max_age;
        int rc;
 
        ENTRY;
        max_age = ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS;
 
        time64_t max_age;
        int rc;
 
        ENTRY;
        max_age = ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS;
 
-        rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
-        if (rc) {
-                CERROR("md_statfs fails: rc = %d\n", rc);
-                RETURN(rc);
-        }
+       rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
+       if (rc)
+               RETURN(rc);
 
        osfs->os_type = LL_SUPER_MAGIC;
 
        CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
 
        osfs->os_type = LL_SUPER_MAGIC;
 
        CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
-               osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
+             osfs->os_bavail, osfs->os_blocks, osfs->os_ffree, osfs->os_files);
 
        if (osfs->os_state & OS_STATE_SUM)
                GOTO(out, rc);
 
 
        if (osfs->os_state & OS_STATE_SUM)
                GOTO(out, rc);
 
-        if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
-                flags |= OBD_STATFS_NODELAY;
+       if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
+               flags |= OBD_STATFS_NODELAY;
 
        rc = obd_statfs(NULL, sbi->ll_dt_exp, &obd_osfs, max_age, flags);
 
        rc = obd_statfs(NULL, sbi->ll_dt_exp, &obd_osfs, max_age, flags);
-        if (rc) {
-                CERROR("obd_statfs fails: rc = %d\n", rc);
-                RETURN(rc);
-        }
+       if (rc) /* Possibly a filesystem with no OSTs.  Report MDT totals. */
+               GOTO(out, rc = 0);
 
        CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n",
 
        CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n",
-               obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
-               obd_osfs.os_files);
-
-        osfs->os_bsize = obd_osfs.os_bsize;
-        osfs->os_blocks = obd_osfs.os_blocks;
-        osfs->os_bfree = obd_osfs.os_bfree;
-        osfs->os_bavail = obd_osfs.os_bavail;
-
-        /* If we don't have as many objects free on the OST as inodes
-         * on the MDS, we reduce the total number of inodes to
-         * compensate, so that the "inodes in use" number is correct.
-         */
-        if (obd_osfs.os_ffree < osfs->os_ffree) {
-                osfs->os_files = (osfs->os_files - osfs->os_ffree) +
-                        obd_osfs.os_ffree;
-                osfs->os_ffree = obd_osfs.os_ffree;
-        }
+              obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
+              obd_osfs.os_files);
+
+       osfs->os_bsize = obd_osfs.os_bsize;
+       osfs->os_blocks = obd_osfs.os_blocks;
+       osfs->os_bfree = obd_osfs.os_bfree;
+       osfs->os_bavail = obd_osfs.os_bavail;
+
+       /* If we have _some_ OSTs, but don't have as many free objects on the
+        * OSTs as inodes on the MDTs, reduce the reported number of inodes
+        * to compensate, so that the "inodes in use" number is correct.
+        * This should be kept in sync with lod_statfs() behaviour.
+        */
+       if (obd_osfs.os_files && obd_osfs.os_ffree < osfs->os_ffree) {
+               osfs->os_files = (osfs->os_files - osfs->os_ffree) +
+                                obd_osfs.os_ffree;
+               osfs->os_ffree = obd_osfs.os_ffree;
+       }
 
 out:
 
 out:
-        RETURN(rc);
+       RETURN(rc);
 }
 int ll_statfs(struct dentry *de, struct kstatfs *sfs)
 {
 }
 int ll_statfs(struct dentry *de, struct kstatfs *sfs)
 {
index 19bfed4..cc0fd5d 100644 (file)
@@ -1410,8 +1410,8 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
                rc = obd_statfs(env, lmv->tgts[idx]->ltd_exp, temp,
                                max_age, flags);
                if (rc) {
                rc = obd_statfs(env, lmv->tgts[idx]->ltd_exp, temp,
                                max_age, flags);
                if (rc) {
-                       CERROR("can't stat MDS #%d (%s), error %d\n", i,
-                              lmv->tgts[idx]->ltd_exp->exp_obd->obd_name,
+                       CERROR("%s: can't stat MDS #%d: rc = %d\n",
+                              lmv->tgts[idx]->ltd_exp->exp_obd->obd_name, i,
                               rc);
                        GOTO(out_free_temp, rc);
                }
                               rc);
                        GOTO(out_free_temp, rc);
                }
index 94c373c..96e0e2a 100644 (file)
@@ -1327,8 +1327,9 @@ static int lod_statfs(const struct lu_env *env,
        struct lod_ost_desc *ost;
        struct lod_mdt_desc *mdt;
        struct obd_statfs ost_sfs;
        struct lod_ost_desc *ost;
        struct lod_mdt_desc *mdt;
        struct obd_statfs ost_sfs;
+       u64 ost_files = 0;
+       u64 ost_ffree = 0;
        int i, rc, bs;
        int i, rc, bs;
-       bool mdtonly;
 
        rc = dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
        if (rc)
 
        rc = dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
        if (rc)
@@ -1360,8 +1361,6 @@ static int lod_statfs(const struct lu_env *env,
         * decide how to account MDT space. for simplicity let's
         * just fallback to pre-DoM policy if any OST is alive
         */
         * decide how to account MDT space. for simplicity let's
         * just fallback to pre-DoM policy if any OST is alive
         */
-       mdtonly = true;
-
        lod_getref(&lod->lod_ost_descs);
        lod_foreach_ost(lod, i) {
                ost = OST_TGT(lod, i);
        lod_getref(&lod->lod_ost_descs);
        lod_foreach_ost(lod, i) {
                ost = OST_TGT(lod, i);
@@ -1370,17 +1369,18 @@ static int lod_statfs(const struct lu_env *env,
                /* ignore errors */
                if (rc || ost_sfs.os_bsize == 0)
                        continue;
                /* ignore errors */
                if (rc || ost_sfs.os_bsize == 0)
                        continue;
-               if (mdtonly) {
+               if (!ost_files) {
                        /*
                        /*
-                        * if only MDTs and DoM report MDT space,
-                        * otherwise only OST space
+                        * if only MDTs with DoM then report only MDT blocks,
+                        * otherwise show only OST blocks, and DoM is "free"
                         */
                        sfs->os_bavail = 0;
                        sfs->os_blocks = 0;
                        sfs->os_bfree = 0;
                        sfs->os_granted = 0;
                         */
                        sfs->os_bavail = 0;
                        sfs->os_blocks = 0;
                        sfs->os_bfree = 0;
                        sfs->os_granted = 0;
-                       mdtonly = false;
                }
                }
+               ost_files += sfs->os_files;
+               ost_ffree += sfs->os_ffree;
                ost_sfs.os_bavail += ost_sfs.os_granted;
                lod_statfs_sum(sfs, &ost_sfs, &bs);
                LASSERTF(bs == ost_sfs.os_bsize, "%d != %d\n",
                ost_sfs.os_bavail += ost_sfs.os_granted;
                lod_statfs_sum(sfs, &ost_sfs, &bs);
                LASSERTF(bs == ost_sfs.os_bsize, "%d != %d\n",
@@ -1389,6 +1389,16 @@ static int lod_statfs(const struct lu_env *env,
        lod_putref(lod, &lod->lod_ost_descs);
        sfs->os_state |= OS_STATE_SUM;
 
        lod_putref(lod, &lod->lod_ost_descs);
        sfs->os_state |= OS_STATE_SUM;
 
+       /* If we have _some_ OSTs, but don't have as many free objects on the
+        * OSTs as inodes on the MDTs, reduce the reported number of inodes
+        * to compensate, so that the "inodes in use" number is correct.
+        * This should be kept in sync with ll_statfs_internal().
+        */
+       if (ost_files && ost_ffree < sfs->os_ffree) {
+               sfs->os_files = (sfs->os_files - sfs->os_ffree) + ost_ffree;
+               sfs->os_ffree = ost_ffree;
+       }
+
        /* a single successful statfs should be enough */
        rc = 0;
 
        /* a single successful statfs should be enough */
        rc = 0;
 
index 0b7eea3..9c8714b 100755 (executable)
@@ -19525,40 +19525,40 @@ check_lfs_df() {
        local inodes
        local df_out
        local lfs_df_out
        local inodes
        local df_out
        local lfs_df_out
-       local tries=100
-       local count=0
+       local count
        local passed=false
 
        # blocks or inodes
        [ "$1" == "blocks" ] && inodes= || inodes="-i"
 
        local passed=false
 
        # blocks or inodes
        [ "$1" == "blocks" ] && inodes= || inodes="-i"
 
-       while (( count < tries )); do
+       for count in {1..100}; do
                cancel_lru_locks
                sync; sleep 0.2
 
                # read the lines of interest
                cancel_lru_locks
                sync; sleep 0.2
 
                # read the lines of interest
-               df_out=($(df $inodes $dir | tail -n +2)) ||
+               df_out=($(df -P $inodes $dir | tail -n +2)) ||
                        error "df $inodes $dir | tail -n +2 failed"
                lfs_df_out=($($LFS df $inodes $dir | grep summary:)) ||
                        error "lfs df $inodes $dir | grep summary: failed"
 
                # skip first substrings of each output as they are different
                        error "df $inodes $dir | tail -n +2 failed"
                lfs_df_out=($($LFS df $inodes $dir | grep summary:)) ||
                        error "lfs df $inodes $dir | grep summary: failed"
 
                # skip first substrings of each output as they are different
-               # <NID>:/<fsname for df, filesystem_summary: for lfs df
-               df_out=(${df_out[@]:1})
-               lfs_df_out=(${lfs_df_out[@]:1})
-
+               # "<NID>:/<fsname>" for df, "filesystem_summary:" for lfs df
                # compare the two outputs
                passed=true
                # compare the two outputs
                passed=true
-
-               for i in {0..4}; do
+               for i in {1..5}; do
                        [ "${df_out[i]}" != "${lfs_df_out[i]}" ] && passed=false
                done
                $passed && break
        done
 
                        [ "${df_out[i]}" != "${lfs_df_out[i]}" ] && passed=false
                done
                $passed && break
        done
 
-       $passed || error "df and lfs df $1 output mismatch: "   \
-                        "df ${inodes}: ${df_out[*]}, "         \
-                        "lfs df ${inodes}: ${lfs_df_out[*]}"
+       if ! $passed; then
+               df -P $inodes $dir
+               echo
+               lfs df $inodes $dir
+               error "df and lfs df $1 output mismatch: "      \
+                     "df ${inodes}: ${df_out[*]}, "            \
+                     "lfs df ${inodes}: ${lfs_df_out[*]}"
+       fi
 }
 
 test_418() {
 }
 
 test_418() {