Whamcloud - gitweb
LU-10018 protocol: MDT as a statfs proxy 36/29136/91
authorAlex Zhuravlev <alexey.zhuravlev@intel.com>
Thu, 21 Sep 2017 15:24:18 +0000 (18:24 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 1 Sep 2018 03:28:47 +0000 (03:28 +0000)
MDT can act as a proxy for statfs data. this should
make df faster (RTT vs RTT*(#MDTs+1)) and enable
idling connections so that clients don't connect to
each OST just to report statfs data. the protocol
has been changing slightly to let MDT differentiate
self and aggregated statfs.

also, obd_statfs has got a new field "granted" where
OST reports how much space has been granted to the
requesting MDT so that space can be added to available
space.

client's NID is used to distribute MDS_STATFS among
MDTS.

Change-Id: I59e03cb5abf809ae8820f874ec51dd2b74e1806c
Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-on: https://review.whamcloud.com/29136
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
25 files changed:
lustre/include/md_object.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/include/obd_support.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/llite_lib.c
lustre/lmv/lmv_obd.c
lustre/lod/lod_dev.c
lustre/lod/lod_object.c
lustre/mdc/mdc_request.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/ofd/ofd_obd.c
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/osp/osp_precreate.c
lustre/osp/osp_sync.c
lustre/ptlrpc/layout.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/wiretest.c
lustre/target/tgt_handler.c
lustre/tests/replay-single.sh
lustre/tests/sanity.sh
lustre/utils/wiretest.c

index 0b1ae1f..cdfb03b 100644 (file)
@@ -648,6 +648,14 @@ static inline int mdo_unlink(const struct lu_env *env,
        return p->mo_dir_ops->mdo_unlink(env, p, c, lname, ma, no_name);
 }
 
        return p->mo_dir_ops->mdo_unlink(env, p, c, lname, ma, no_name);
 }
 
+static inline int mdo_statfs(const struct lu_env *env,
+                            struct md_device *m,
+                            struct obd_statfs *sfs)
+{
+       LASSERT(m->md_ops->mdo_statfs);
+       return m->md_ops->mdo_statfs(env, m, sfs);
+}
+
 /**
  * Used in MDD/OUT layer for object lock rule
  **/
 /**
  * Used in MDD/OUT layer for object lock rule
  **/
index fa3b407..6984f92 100644 (file)
@@ -446,6 +446,8 @@ struct lmv_obd {
 
        __u32                   tgts_size; /* size of tgts array */
        struct lmv_tgt_desc     **tgts;
 
        __u32                   tgts_size; /* size of tgts array */
        struct lmv_tgt_desc     **tgts;
+       int                     lmv_statfs_start;
+
 
        struct obd_connect_data conn_data;
        struct kobject          *lmv_tgts_kobj;
 
        struct obd_connect_data conn_data;
        struct kobject          *lmv_tgts_kobj;
index 3aaa52e..31b06d9 100644 (file)
@@ -47,6 +47,7 @@
                                          * obd_osfs_age */
 #define OBD_STATFS_FOR_MDT0    0x0004  /* The statfs is only for retrieving
                                         * information from MDT0. */
                                          * obd_osfs_age */
 #define OBD_STATFS_FOR_MDT0    0x0004  /* The statfs is only for retrieving
                                         * information from MDT0. */
+#define OBD_STATFS_SUM         0x0008  /* get aggregated statfs from MDT */
 
 extern rwlock_t obd_dev_lock;
 
 
 extern rwlock_t obd_dev_lock;
 
@@ -1081,7 +1082,10 @@ static inline int obd_statfs(const struct lu_env *env, struct obd_export *exp,
 
        CDEBUG(D_SUPER, "osfs %lld, max_age %lld\n",
                obd->obd_osfs_age, max_age);
 
        CDEBUG(D_SUPER, "osfs %lld, max_age %lld\n",
                obd->obd_osfs_age, max_age);
-       if (obd->obd_osfs_age < max_age) {
+       /* ignore cache if aggregated isn't expected */
+       if (obd->obd_osfs_age < max_age ||
+           ((obd->obd_osfs.os_state & OS_STATE_SUM) &&
+            !(flags & OBD_STATFS_SUM))) {
                 rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags);
                 if (rc == 0) {
                        spin_lock(&obd->obd_osfs_lock);
                 rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags);
                 if (rc == 0) {
                        spin_lock(&obd->obd_osfs_lock);
index e4aaaf8..d5f4223 100644 (file)
@@ -179,7 +179,9 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_GET_ROOT_NET       0x11b
 #define OBD_FAIL_MDS_GET_ROOT_PACK      0x11c
 #define OBD_FAIL_MDS_STATFS_PACK         0x11d
 #define OBD_FAIL_MDS_GET_ROOT_NET       0x11b
 #define OBD_FAIL_MDS_GET_ROOT_PACK      0x11c
 #define OBD_FAIL_MDS_STATFS_PACK         0x11d
+#define OBD_FAIL_MDS_STATFS_SUM_PACK     0x11d
 #define OBD_FAIL_MDS_STATFS_NET          0x11e
 #define OBD_FAIL_MDS_STATFS_NET          0x11e
+#define OBD_FAIL_MDS_STATFS_SUM_NET      0x11e
 #define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
 #define OBD_FAIL_MDS_PIN_NET             0x120
 #define OBD_FAIL_MDS_UNPIN_NET           0x121
 #define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
 #define OBD_FAIL_MDS_PIN_NET             0x120
 #define OBD_FAIL_MDS_UNPIN_NET           0x121
index ba34185..44daeef 100644 (file)
@@ -823,6 +823,7 @@ struct ptlrpc_body_v2 {
 #define OBD_CONNECT2_FILE_SECCTX        0x1ULL /* set file security context at create */
 #define OBD_CONNECT2_LOCKAHEAD          0x2ULL /* ladvise lockahead v2 */
 #define OBD_CONNECT2_DIR_MIGRATE        0x4ULL /* migrate striped dir */
 #define OBD_CONNECT2_FILE_SECCTX        0x1ULL /* set file security context at create */
 #define OBD_CONNECT2_LOCKAHEAD          0x2ULL /* ladvise lockahead v2 */
 #define OBD_CONNECT2_DIR_MIGRATE        0x4ULL /* migrate striped dir */
+#define OBD_CONNECT2_SUM_STATFS                0x8ULL /* MDT return aggregated stats */
 #define OBD_CONNECT2_FLR               0x20ULL /* FLR support */
 #define OBD_CONNECT2_WBC_INTENTS       0x40ULL /* create/unlink/... intents for wbc, also operations under client-held parent locks */
 #define OBD_CONNECT2_LOCK_CONVERT      0x80ULL /* IBITS lock convert support */
 #define OBD_CONNECT2_FLR               0x20ULL /* FLR support */
 #define OBD_CONNECT2_WBC_INTENTS       0x40ULL /* create/unlink/... intents for wbc, also operations under client-held parent locks */
 #define OBD_CONNECT2_LOCK_CONVERT      0x80ULL /* IBITS lock convert support */
@@ -878,7 +879,8 @@ struct ptlrpc_body_v2 {
                                OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2)
 
 #define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | OBD_CONNECT2_FLR | \
                                OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2)
 
 #define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | OBD_CONNECT2_FLR | \
-+                               OBD_CONNECT2_LOCK_CONVERT)
+                                OBD_CONNECT2_SUM_STATFS | \
+                               OBD_CONNECT2_LOCK_CONVERT)
 
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
 
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
@@ -1260,7 +1262,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
 #define OBD_MD_FLXATTRLS     (0x0000002000000000ULL) /* xattr list */
 #define OBD_MD_FLXATTRRM     (0x0000004000000000ULL) /* xattr remove */
 #define OBD_MD_FLACL         (0x0000008000000000ULL) /* ACL */
 #define OBD_MD_FLXATTRLS     (0x0000002000000000ULL) /* xattr list */
 #define OBD_MD_FLXATTRRM     (0x0000004000000000ULL) /* xattr remove */
 #define OBD_MD_FLACL         (0x0000008000000000ULL) /* ACL */
-/*     OBD_MD_FLRMTPERM     (0x0000010000000000ULL) remote perm, obsolete */
+#define OBD_MD_FLAGSTATFS    (0x0000010000000000ULL) /* aggregated statfs */
 #define OBD_MD_FLMDSCAPA     (0x0000020000000000ULL) /* MDS capability */
 #define OBD_MD_FLOSSCAPA     (0x0000040000000000ULL) /* OSS capability */
 /*      OBD_MD_FLCKSPLIT     (0x0000080000000000ULL) obsolete 2.3.58*/
 #define OBD_MD_FLMDSCAPA     (0x0000020000000000ULL) /* MDS capability */
 #define OBD_MD_FLOSSCAPA     (0x0000040000000000ULL) /* OSS capability */
 /*      OBD_MD_FLCKSPLIT     (0x0000080000000000ULL) obsolete 2.3.58*/
index 716ee16..ab640fa 100644 (file)
@@ -122,6 +122,7 @@ enum obd_statfs_state {
        OS_STATE_NOPRECREATE    = 0x00000004, /**< no object precreation */
        OS_STATE_ENOSPC         = 0x00000020, /**< not enough free space */
        OS_STATE_ENOINO         = 0x00000040, /**< not enough inodes */
        OS_STATE_NOPRECREATE    = 0x00000004, /**< no object precreation */
        OS_STATE_ENOSPC         = 0x00000020, /**< not enough free space */
        OS_STATE_ENOINO         = 0x00000040, /**< not enough inodes */
+       OS_STATE_SUM            = 0x00000100, /**< aggregated for all tagrets */
 };
 
 /** filesystem statistics/attributes for target device */
 };
 
 /** filesystem statistics/attributes for target device */
@@ -140,14 +141,14 @@ struct obd_statfs {
        __u32           os_fprecreated; /* objs available now to the caller */
                                        /* used in QoS code to find preferred
                                         * OSTs */
        __u32           os_fprecreated; /* objs available now to the caller */
                                        /* used in QoS code to find preferred
                                         * OSTs */
-       __u32           os_spare2;      /* Unused padding fields.  Remember */
-       __u32           os_spare3;      /* to fix lustre_swab_obd_statfs() */
-       __u32           os_spare4;
-       __u32           os_spare5;
-       __u32           os_spare6;
-       __u32           os_spare7;
-       __u32           os_spare8;
-       __u32           os_spare9;
+       __u32           os_granted;     /* space granted for MDS */
+       __u32           os_spare3;      /* Unused padding fields.  Remember */
+       __u32           os_spare4;      /* to fix lustre_swab_obd_statfs() */
+       __u32           os_spare5;
+       __u32           os_spare6;
+       __u32           os_spare7;
+       __u32           os_spare8;
+       __u32           os_spare9;
 };
 
 /**
 };
 
 /**
index 54a752f..0152df5 100644 (file)
@@ -227,7 +227,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 
        data->ocd_connect_flags2 = OBD_CONNECT2_FLR |
                                   OBD_CONNECT2_LOCK_CONVERT |
 
        data->ocd_connect_flags2 = OBD_CONNECT2_FLR |
                                   OBD_CONNECT2_LOCK_CONVERT |
-                                  OBD_CONNECT2_DIR_MIGRATE;
+                                  OBD_CONNECT2_DIR_MIGRATE |
+                                  OBD_CONNECT2_SUM_STATFS;
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
@@ -1813,6 +1814,9 @@ int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
        CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
                osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
 
        CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
                osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
 
+       if (osfs->os_state & OS_STATE_SUM)
+               GOTO(out, rc);
+
         if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
                 flags |= OBD_STATFS_NODELAY;
 
         if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
                 flags |= OBD_STATFS_NODELAY;
 
@@ -1841,6 +1845,7 @@ int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
                 osfs->os_ffree = obd_osfs.os_ffree;
         }
 
                 osfs->os_ffree = obd_osfs.os_ffree;
         }
 
+out:
         RETURN(rc);
 }
 int ll_statfs(struct dentry *de, struct kstatfs *sfs)
         RETURN(rc);
 }
 int ll_statfs(struct dentry *de, struct kstatfs *sfs)
@@ -1854,7 +1859,7 @@ int ll_statfs(struct dentry *de, struct kstatfs *sfs)
         ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
 
        /* Some amount of caching on the client is allowed */
         ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
 
        /* Some amount of caching on the client is allowed */
-       rc = ll_statfs_internal(ll_s2sbi(sb), &osfs, 0);
+       rc = ll_statfs_internal(ll_s2sbi(sb), &osfs, OBD_STATFS_SUM);
        if (rc)
                return rc;
 
        if (rc)
                return rc;
 
index 19742bb..5ab1cb4 100644 (file)
@@ -1335,6 +1335,32 @@ out:
        RETURN(rc);
 }
 
        RETURN(rc);
 }
 
+static int lmv_select_statfs_mdt(struct lmv_obd *lmv, __u32 flags)
+{
+       int i;
+
+       if (flags & OBD_STATFS_FOR_MDT0)
+               return 0;
+
+       if (lmv->lmv_statfs_start || lmv->desc.ld_tgt_count == 1)
+               return lmv->lmv_statfs_start;
+
+       /* choose initial MDT for this client */
+       for (i = 0;; i++) {
+               struct lnet_process_id lnet_id;
+               if (LNetGetId(i, &lnet_id) == -ENOENT)
+                       break;
+
+               if (LNET_NETTYP(LNET_NIDNET(lnet_id.nid)) != LOLND) {
+                       lmv->lmv_statfs_start =
+                               lnet_id.nid % lmv->desc.ld_tgt_count;
+                       break;
+               }
+       }
+
+       return lmv->lmv_statfs_start;
+}
+
 static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
                      struct obd_statfs *osfs, time64_t max_age, __u32 flags)
 {
 static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
                      struct obd_statfs *osfs, time64_t max_age, __u32 flags)
 {
@@ -1342,42 +1368,52 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
        struct lmv_obd          *lmv = &obd->u.lmv;
        struct obd_statfs       *temp;
        int                      rc = 0;
        struct lmv_obd          *lmv = &obd->u.lmv;
        struct obd_statfs       *temp;
        int                      rc = 0;
-       __u32                    i;
+       __u32                    i, idx;
        ENTRY;
 
         OBD_ALLOC(temp, sizeof(*temp));
         if (temp == NULL)
                 RETURN(-ENOMEM);
 
        ENTRY;
 
         OBD_ALLOC(temp, sizeof(*temp));
         if (temp == NULL)
                 RETURN(-ENOMEM);
 
-        for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-               if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL)
+       /* distribute statfs among MDTs */
+       idx = lmv_select_statfs_mdt(lmv, flags);
+
+       for (i = 0; i < lmv->desc.ld_tgt_count; i++, idx++) {
+               idx = idx % lmv->desc.ld_tgt_count;
+               if (lmv->tgts[idx] == NULL || lmv->tgts[idx]->ltd_exp == NULL)
                        continue;
 
                        continue;
 
-               rc = obd_statfs(env, lmv->tgts[i]->ltd_exp, temp,
+               rc = obd_statfs(env, lmv->tgts[idx]->ltd_exp, temp,
                                max_age, flags);
                if (rc) {
                        CERROR("can't stat MDS #%d (%s), error %d\n", i,
                                max_age, flags);
                if (rc) {
                        CERROR("can't stat MDS #%d (%s), error %d\n", i,
-                              lmv->tgts[i]->ltd_exp->exp_obd->obd_name,
+                              lmv->tgts[idx]->ltd_exp->exp_obd->obd_name,
                               rc);
                        GOTO(out_free_temp, rc);
                }
 
                               rc);
                        GOTO(out_free_temp, rc);
                }
 
+               if (temp->os_state & OS_STATE_SUM ||
+                   flags == OBD_STATFS_FOR_MDT0) {
+                       /* reset to the last aggregated values
+                        * and don't sum with non-aggrated data */
+                       /* If the statfs is from mount, it needs to retrieve
+                        * necessary information from MDT0. i.e. mount does
+                        * not need the merged osfs from all of MDT. Also
+                        * clients can be mounted as long as MDT0 is in
+                        * service */
+                       *osfs = *temp;
+                       break;
+               }
+
                if (i == 0) {
                        *osfs = *temp;
                if (i == 0) {
                        *osfs = *temp;
-                       /* If the statfs is from mount, it will needs
-                        * retrieve necessary information from MDT0.
-                        * i.e. mount does not need the merged osfs
-                        * from all of MDT.
-                        * And also clients can be mounted as long as
-                        * MDT0 is in service*/
-                       if (flags & OBD_STATFS_FOR_MDT0)
-                               GOTO(out_free_temp, rc);
-                } else {
-                        osfs->os_bavail += temp->os_bavail;
-                        osfs->os_blocks += temp->os_blocks;
-                        osfs->os_ffree += temp->os_ffree;
-                        osfs->os_files += temp->os_files;
-                }
+               } else {
+                       osfs->os_bavail += temp->os_bavail;
+                       osfs->os_blocks += temp->os_blocks;
+                       osfs->os_ffree += temp->os_ffree;
+                       osfs->os_files += temp->os_files;
+                       osfs->os_granted += temp->os_granted;
+               }
         }
 
         EXIT;
         }
 
         EXIT;
index 8d94449..6949283 100644 (file)
@@ -1263,6 +1263,30 @@ static int lod_root_get(const struct lu_env *env,
        return dt_root_get(env, dt2lod_dev(dev)->lod_child, f);
 }
 
        return dt_root_get(env, dt2lod_dev(dev)->lod_child, f);
 }
 
+static void lod_statfs_sum(struct obd_statfs *sfs,
+                            struct obd_statfs *ost_sfs, int *bs)
+{
+       while (ost_sfs->os_bsize < *bs) {
+               *bs >>= 1;
+               sfs->os_bsize >>= 1;
+               sfs->os_bavail <<= 1;
+               sfs->os_blocks <<= 1;
+               sfs->os_bfree <<= 1;
+               sfs->os_granted <<= 1;
+       }
+       while (ost_sfs->os_bsize > *bs) {
+               ost_sfs->os_bsize >>= 1;
+               ost_sfs->os_bavail <<= 1;
+               ost_sfs->os_blocks <<= 1;
+               ost_sfs->os_bfree <<= 1;
+               ost_sfs->os_granted <<= 1;
+       }
+       sfs->os_bavail += ost_sfs->os_bavail;
+       sfs->os_blocks += ost_sfs->os_blocks;
+       sfs->os_bfree += ost_sfs->os_bfree;
+       sfs->os_granted += ost_sfs->os_granted;
+}
+
 /**
  * Implementation of dt_device_operations::dt_statfs() for LOD
  *
 /**
  * Implementation of dt_device_operations::dt_statfs() for LOD
  *
@@ -1271,7 +1295,73 @@ static int lod_root_get(const struct lu_env *env,
 static int lod_statfs(const struct lu_env *env,
                      struct dt_device *dev, struct obd_statfs *sfs)
 {
 static int lod_statfs(const struct lu_env *env,
                      struct dt_device *dev, struct obd_statfs *sfs)
 {
-       return dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
+       struct lod_device   *lod = dt2lod_dev(dev);
+       struct lod_ost_desc *ost;
+       struct lod_mdt_desc *mdt;
+       struct obd_statfs    ost_sfs;
+       int i, rc, bs;
+       bool mdtonly;
+
+       rc = dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
+       if (rc)
+               GOTO(out, rc);
+
+       bs = sfs->os_bsize;
+
+       sfs->os_bavail = 0;
+       sfs->os_blocks = 0;
+       sfs->os_bfree = 0;
+       sfs->os_granted = 0;
+
+       lod_getref(&lod->lod_mdt_descs);
+       lod_foreach_mdt(lod, i) {
+               mdt = MDT_TGT(lod, i);
+               LASSERT(mdt && mdt->ltd_mdt);
+               rc = dt_statfs(env, mdt->ltd_mdt, &ost_sfs);
+               /* ignore errors */
+               if (rc)
+                       continue;
+               sfs->os_files += ost_sfs.os_files;
+               sfs->os_ffree += ost_sfs.os_ffree;
+               lod_statfs_sum(sfs, &ost_sfs, &bs);
+       }
+       lod_putref(lod, &lod->lod_mdt_descs);
+
+       /* at some point we can check whether DoM is enabled and
+        * decide how to account MDT space. for simplicity let's
+        * just fallback to pre-DoM policy if any OST is alive */
+       mdtonly = true;
+
+       lod_getref(&lod->lod_ost_descs);
+       lod_foreach_ost(lod, i) {
+               ost = OST_TGT(lod, i);
+               LASSERT(ost && ost->ltd_ost);
+               rc = dt_statfs(env, ost->ltd_ost, &ost_sfs);
+               /* ignore errors */
+               if (rc || ost_sfs.os_bsize == 0)
+                       continue;
+               if (mdtonly) {
+                       /* if only MDTs and DoM report MDT space,
+                        * otherwise only OST space */
+                       sfs->os_bavail = 0;
+                       sfs->os_blocks = 0;
+                       sfs->os_bfree = 0;
+                       sfs->os_granted = 0;
+                       mdtonly = false;
+               }
+               ost_sfs.os_bavail += ost_sfs.os_granted;
+               lod_statfs_sum(sfs, &ost_sfs, &bs);
+               LASSERTF(bs == ost_sfs.os_bsize, "%d != %d\n",
+                       (int)sfs->os_bsize, (int)ost_sfs.os_bsize);
+       }
+       lod_putref(lod, &lod->lod_ost_descs);
+       sfs->os_state |= OS_STATE_SUM;
+
+       /* a single successful statfs should be enough */
+       rc = 0;
+
+out:
+       RETURN(rc);
 }
 
 /**
 }
 
 /**
index 4c367bb..9299193 100644 (file)
@@ -1877,6 +1877,7 @@ static int lod_prep_md_striped_create(const struct lu_env *env,
                                      struct dt_object_format *dof,
                                      struct thandle *th)
 {
                                      struct dt_object_format *dof,
                                      struct thandle *th)
 {
+       struct lod_thread_info  *info = lod_env_info(env);
        struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
        struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
        struct lod_object       *lo = lod_dt_obj(dt);
        struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
        struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
        struct lod_object       *lo = lod_dt_obj(dt);
@@ -1969,7 +1970,7 @@ static int lod_prep_md_striped_create(const struct lu_env *env,
                                continue;
 
                        tgt_dt = tgt->ltd_tgt;
                                continue;
 
                        tgt_dt = tgt->ltd_tgt;
-                       rc = dt_statfs(env, tgt_dt, NULL);
+                       rc = dt_statfs(env, tgt_dt, &info->lti_osfs);
                        if (rc) {
                                /* this OSP doesn't feel well */
                                rc = 0;
                        if (rc) {
                                /* this OSP doesn't feel well */
                                rc = 0;
index 4e0b35b..2ef7061 100644 (file)
@@ -1488,7 +1488,7 @@ static int mdc_statfs(const struct lu_env *env,
         struct ptlrpc_request *req;
         struct obd_statfs     *msfs;
         struct obd_import     *imp = NULL;
         struct ptlrpc_request *req;
         struct obd_statfs     *msfs;
         struct obd_import     *imp = NULL;
-        int                    rc;
+       int                    rc;
         ENTRY;
 
         /*
         ENTRY;
 
         /*
@@ -1502,10 +1502,21 @@ static int mdc_statfs(const struct lu_env *env,
         if (!imp)
                 RETURN(-ENODEV);
 
         if (!imp)
                 RETURN(-ENODEV);
 
-        req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS,
-                                        LUSTRE_MDS_VERSION, MDS_STATFS);
-        if (req == NULL)
-                GOTO(output, rc = -ENOMEM);
+       req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS,
+                                       LUSTRE_MDS_VERSION, MDS_STATFS);
+       if (req == NULL)
+               GOTO(output, rc = -ENOMEM);
+
+       if ((flags & OBD_STATFS_SUM) &&
+           (exp_connect_flags2(exp) & OBD_CONNECT2_SUM_STATFS)) {
+               /* request aggregated states */
+               struct mdt_body *body;
+
+               body = req_capsule_client_get(&req->rq_pill, &RMF_MDT_BODY);
+               if (body == NULL)
+                       GOTO(out, rc = -EPROTO);
+               body->mbo_valid = OBD_MD_FLAGSTATFS;
+       }
 
         ptlrpc_request_set_replen(req);
 
 
         ptlrpc_request_set_replen(req);
 
index 6550570..540f093 100644 (file)
@@ -412,13 +412,16 @@ out:
 
 static int mdt_statfs(struct tgt_session_info *tsi)
 {
 
 static int mdt_statfs(struct tgt_session_info *tsi)
 {
-       struct ptlrpc_request           *req = tgt_ses_req(tsi);
-       struct mdt_thread_info          *info = tsi2mdt_info(tsi);
-       struct mdt_device               *mdt = info->mti_mdt;
-       struct tg_grants_data           *tgd = &mdt->mdt_lut.lut_tgd;
-       struct ptlrpc_service_part      *svcpt;
-       struct obd_statfs               *osfs;
-       int                             rc;
+       struct ptlrpc_request *req = tgt_ses_req(tsi);
+       struct mdt_thread_info *info = tsi2mdt_info(tsi);
+       struct mdt_device *mdt = info->mti_mdt;
+       struct tg_grants_data *tgd = &mdt->mdt_lut.lut_tgd;
+       struct md_device *next = mdt->mdt_child;
+       struct ptlrpc_service_part *svcpt;
+       struct obd_statfs *osfs;
+       struct mdt_body *reqbody = NULL;
+       struct mdt_statfs_cache *msf;
+       int rc;
 
        ENTRY;
 
 
        ENTRY;
 
@@ -440,11 +443,34 @@ static int mdt_statfs(struct tgt_session_info *tsi)
        if (!osfs)
                GOTO(out, rc = -EPROTO);
 
        if (!osfs)
                GOTO(out, rc = -EPROTO);
 
-       rc = tgt_statfs_internal(tsi->tsi_env, &mdt->mdt_lut, osfs,
-                                ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
-                                NULL);
-       if (unlikely(rc))
-               GOTO(out, rc);
+       if (mdt_is_sum_statfs_client(req->rq_export))
+               reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
+
+       if (reqbody && reqbody->mbo_valid & OBD_MD_FLAGSTATFS)
+               msf = &mdt->mdt_sum_osfs;
+       else
+               msf = &mdt->mdt_osfs;
+
+       if (msf->msf_age + OBD_STATFS_CACHE_SECONDS <= ktime_get_seconds()) {
+                       /** statfs data is too old, get up-to-date one */
+                       if (reqbody && reqbody->mbo_valid & OBD_MD_FLAGSTATFS)
+                               rc = next->md_ops->mdo_statfs(info->mti_env,
+                                                             next, osfs);
+                       else
+                               rc = dt_statfs(info->mti_env, mdt->mdt_bottom,
+                                              osfs);
+                       if (rc)
+                               GOTO(out, rc);
+                       spin_lock(&mdt->mdt_lock);
+                       msf->msf_osfs = *osfs;
+                       msf->msf_age = ktime_get_seconds();
+                       spin_unlock(&mdt->mdt_lock);
+       } else {
+                       /** use cached statfs data */
+                       spin_lock(&mdt->mdt_lock);
+                       *osfs = msf->msf_osfs;
+                       spin_unlock(&mdt->mdt_lock);
+       }
 
        /* at least try to account for cached pages.  its still racy and
         * might be under-reporting if clients haven't announced their
 
        /* at least try to account for cached pages.  its still racy and
         * might be under-reporting if clients haven't announced their
index 71cf493..8917002 100644 (file)
@@ -196,6 +196,11 @@ enum {
        NUM_DOM_LOCK_ON_OPEN_MODES
 };
 
        NUM_DOM_LOCK_ON_OPEN_MODES
 };
 
+struct mdt_statfs_cache {
+       struct obd_statfs msf_osfs;
+       __u64 msf_age;
+};
+
 struct mdt_device {
        /* super-class */
        struct lu_device           mdt_lu_dev;
 struct mdt_device {
        /* super-class */
        struct lu_device           mdt_lu_dev;
@@ -250,6 +255,10 @@ struct mdt_device {
        /* lock for osfs and md_root */
        spinlock_t                 mdt_lock;
 
        /* lock for osfs and md_root */
        spinlock_t                 mdt_lock;
 
+       /* statfs optimization: we cache a bit  */
+       struct mdt_statfs_cache    mdt_sum_osfs;
+       struct mdt_statfs_cache    mdt_osfs;
+
         /* root squash */
        struct root_squash_info    mdt_squash;
 
         /* root squash */
        struct root_squash_info    mdt_squash;
 
@@ -689,6 +698,12 @@ static inline bool mdt_lmm_is_flr(struct lov_mds_md *lmm)
               le16_to_cpu(lcm->lcm_mirror_count) > 0;
 }
 
               le16_to_cpu(lcm->lcm_mirror_count) > 0;
 }
 
+static inline bool mdt_is_sum_statfs_client(struct obd_export *exp)
+{
+       return exp_connect_flags(exp) & OBD_CONNECT_FLAGS2 &&
+              exp_connect_flags2(exp) & OBD_CONNECT2_SUM_STATFS;
+}
+
 __u64 mdt_get_disposition(struct ldlm_reply *rep, __u64 op_flag);
 void mdt_set_disposition(struct mdt_thread_info *info,
                         struct ldlm_reply *rep, __u64 op_flag);
 __u64 mdt_get_disposition(struct ldlm_reply *rep, __u64 op_flag);
 void mdt_set_disposition(struct mdt_thread_info *info,
                         struct ldlm_reply *rep, __u64 op_flag);
index bbe2686..e83f4ce 100644 (file)
@@ -827,8 +827,9 @@ int ofd_statfs(const struct lu_env *env,  struct obd_export *exp,
                struct tg_export_data *ted;
 
                ted = &obd->obd_self_export->exp_target_data;
                struct tg_export_data *ted;
 
                ted = &obd->obd_self_export->exp_target_data;
-               osfs->os_bavail -= min_t(u64, osfs->os_bavail,
-                                        ted->ted_grant >> tgd->tgd_blockbits);
+               osfs->os_granted = min_t(u64, osfs->os_bavail,
+                                         ted->ted_grant >> tgd->tgd_blockbits);
+               osfs->os_bavail -= osfs->os_granted;
        }
 
        tgt_grant_sanity_check(obd, __func__);
        }
 
        tgt_grant_sanity_check(obd, __func__);
@@ -858,6 +859,7 @@ int ofd_statfs(const struct lu_env *env,  struct obd_export *exp,
                osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
                osfs->os_bfree  <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
                osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
                osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
                osfs->os_bfree  <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
                osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
+               osfs->os_granted <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
                osfs->os_bsize    = 1 << COMPAT_BSIZE_SHIFT;
        }
 
                osfs->os_bsize    = 1 << COMPAT_BSIZE_SHIFT;
        }
 
index 7c63087..f471022 100644 (file)
@@ -603,6 +603,8 @@ static int osp_shutdown(const struct lu_env *env, struct osp_device *d)
 
        rc = osp_disconnect(d);
 
 
        rc = osp_disconnect(d);
 
+       osp_statfs_fini(d);
+
        if (!d->opd_connect_mdt) {
                /* stop sync thread */
                osp_sync_fini(d);
        if (!d->opd_connect_mdt) {
                /* stop sync thread */
                osp_sync_fini(d);
@@ -743,12 +745,12 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev,
        if (unlikely(d->opd_imp_active == 0))
                RETURN(-ENOTCONN);
 
        if (unlikely(d->opd_imp_active == 0))
                RETURN(-ENOTCONN);
 
-       if (d->opd_pre == NULL)
-               RETURN(0);
-
        /* return recently updated data */
        *sfs = d->opd_statfs;
 
        /* return recently updated data */
        *sfs = d->opd_statfs;
 
+       if (d->opd_pre == NULL)
+               RETURN(0);
+
        /*
         * layer above osp (usually lod) can use ffree to estimate
         * how many objects are available for immediate creation
        /*
         * layer above osp (usually lod) can use ffree to estimate
         * how many objects are available for immediate creation
@@ -1194,11 +1196,16 @@ static int osp_init0(const struct lu_env *env, struct osp_device *osp,
                if (rc < 0)
                        GOTO(out_precreat, rc);
        } else {
                if (rc < 0)
                        GOTO(out_precreat, rc);
        } else {
+               osp->opd_got_disconnected = 1;
                rc = osp_update_init(osp);
                if (rc != 0)
                        GOTO(out_fid, rc);
        }
 
                rc = osp_update_init(osp);
                if (rc != 0)
                        GOTO(out_fid, rc);
        }
 
+       rc = osp_init_statfs(osp);
+       if (rc)
+               GOTO(out_precreat, rc);
+
        ns_register_cancel(obd->obd_namespace, osp_cancel_weight);
 
        /*
        ns_register_cancel(obd->obd_namespace, osp_cancel_weight);
 
        /*
@@ -1622,12 +1629,10 @@ static int osp_import_event(struct obd_device *obd, struct obd_import *imp,
                d->opd_imp_connected = 1;
                d->opd_imp_seen_connected = 1;
                d->opd_obd->obd_inactive = 0;
                d->opd_imp_connected = 1;
                d->opd_imp_seen_connected = 1;
                d->opd_obd->obd_inactive = 0;
+               wake_up(&d->opd_pre_waitq);
                if (d->opd_connect_mdt)
                        break;
 
                if (d->opd_connect_mdt)
                        break;
 
-               if (d->opd_pre != NULL)
-                       wake_up(&d->opd_pre_waitq);
-
                osp_sync_check_for_work(d);
                CDEBUG(D_HA, "got connected\n");
                break;
                osp_sync_check_for_work(d);
                CDEBUG(D_HA, "got connected\n");
                break;
index 80cdad5..711a787 100644 (file)
@@ -68,7 +68,6 @@ struct osp_precreate {
        /*
         * Precreation pool
         */
        /*
         * Precreation pool
         */
-       spinlock_t                       osp_pre_lock;
 
        /* last fid to assign in creation */
        struct lu_fid                    osp_pre_used_fid;
 
        /* last fid to assign in creation */
        struct lu_fid                    osp_pre_used_fid;
@@ -188,6 +187,7 @@ struct osp_device {
        struct osp_precreate            *opd_pre;
        /* dedicate precreate thread */
        struct ptlrpc_thread             opd_pre_thread;
        struct osp_precreate            *opd_pre;
        /* dedicate precreate thread */
        struct ptlrpc_thread             opd_pre_thread;
+       spinlock_t                       opd_pre_lock;
        /* thread waits for signals about pool going empty */
        wait_queue_head_t                opd_pre_waitq;
 
        /* thread waits for signals about pool going empty */
        wait_queue_head_t                opd_pre_waitq;
 
@@ -268,7 +268,6 @@ struct osp_device {
        int                             opd_reserved_mb_low;
 };
 
        int                             opd_reserved_mb_low;
 };
 
-#define opd_pre_lock                   opd_pre->osp_pre_lock
 #define opd_pre_used_fid               opd_pre->osp_pre_used_fid
 #define opd_pre_last_created_fid       opd_pre->osp_pre_last_created_fid
 #define opd_pre_reserved               opd_pre->osp_pre_reserved
 #define opd_pre_used_fid               opd_pre->osp_pre_used_fid
 #define opd_pre_last_created_fid       opd_pre->osp_pre_last_created_fid
 #define opd_pre_reserved               opd_pre->osp_pre_reserved
@@ -799,6 +798,9 @@ int osp_reset_last_used(const struct lu_env *env, struct osp_device *osp);
 int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
                                 struct lu_fid *fid, int sync);
 int osp_init_pre_fid(struct osp_device *osp);
 int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
                                 struct lu_fid *fid, int sync);
 int osp_init_pre_fid(struct osp_device *osp);
+int osp_init_statfs(struct osp_device *osp);
+void osp_fini_statfs(struct osp_device *osp);
+void osp_statfs_fini(struct osp_device *d);
 
 /* lproc_osp.c */
 void osp_tunables_init(struct osp_device *osp);
 
 /* lproc_osp.c */
 void osp_tunables_init(struct osp_device *osp);
index ca07441..125cc82 100644 (file)
@@ -93,7 +93,7 @@ static void osp_statfs_timer_cb(cfs_timer_cb_arg_t data)
        struct osp_device *d = cfs_from_timer(d, data, opd_statfs_timer);
 
        LASSERT(d);
        struct osp_device *d = cfs_from_timer(d, data, opd_statfs_timer);
 
        LASSERT(d);
-       if (d->opd_pre != NULL && osp_precreate_running(d))
+       if (osp_precreate_running(d))
                wake_up(&d->opd_pre_waitq);
 }
 
                wake_up(&d->opd_pre_waitq);
 }
 
@@ -137,7 +137,8 @@ static int osp_statfs_interpret(const struct lu_env *env,
 
        d->opd_statfs = *msfs;
 
 
        d->opd_statfs = *msfs;
 
-       osp_pre_update_status(d, rc);
+       if (d->opd_pre)
+               osp_pre_update_status(d, rc);
 
        /* schedule next update */
        maxage_ns = d->opd_statfs_maxage * NSEC_PER_SEC;
 
        /* schedule next update */
        maxage_ns = d->opd_statfs_maxage * NSEC_PER_SEC;
@@ -187,17 +188,21 @@ static int osp_statfs_update(const struct lu_env *env, struct osp_device *d)
        imp = d->opd_obd->u.cli.cl_import;
        LASSERT(imp);
 
        imp = d->opd_obd->u.cli.cl_import;
        LASSERT(imp);
 
-       req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
+       req = ptlrpc_request_alloc(imp,
+                          d->opd_pre ? &RQF_OST_STATFS : &RQF_MDS_STATFS);
        if (req == NULL)
                RETURN(-ENOMEM);
 
        if (req == NULL)
                RETURN(-ENOMEM);
 
-       rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
+       rc = ptlrpc_request_pack(req,
+                        d->opd_pre ? LUSTRE_OST_VERSION : LUSTRE_MDS_VERSION,
+                        d->opd_pre ? OST_STATFS : MDS_STATFS);
        if (rc) {
                ptlrpc_request_free(req);
                RETURN(rc);
        }
        ptlrpc_request_set_replen(req);
        if (rc) {
                ptlrpc_request_free(req);
                RETURN(rc);
        }
        ptlrpc_request_set_replen(req);
-       req->rq_request_portal = OST_CREATE_PORTAL;
+       if (d->opd_pre)
+               req->rq_request_portal = OST_CREATE_PORTAL;
        ptlrpc_at_set_req_timeout(req);
 
        req->rq_interpret_reply = (ptlrpc_interpterer_t)osp_statfs_interpret;
        ptlrpc_at_set_req_timeout(req);
 
        req->rq_interpret_reply = (ptlrpc_interpterer_t)osp_statfs_interpret;
@@ -324,6 +329,9 @@ static inline int osp_precreate_near_empty(const struct lu_env *env,
 {
        int rc;
 
 {
        int rc;
 
+       if (d->opd_pre == NULL)
+               return 0;
+
        /* XXX: do we really need locking here? */
        spin_lock(&d->opd_pre_lock);
        rc = osp_precreate_near_empty_nolock(env, d);
        /* XXX: do we really need locking here? */
        spin_lock(&d->opd_pre_lock);
        rc = osp_precreate_near_empty_nolock(env, d);
@@ -1193,7 +1201,7 @@ static int osp_precreate_thread(void *_arg)
                 * need to be connected to OST
                 */
                while (osp_precreate_running(d)) {
                 * need to be connected to OST
                 */
                while (osp_precreate_running(d)) {
-                       if (d->opd_pre_recovering &&
+                       if ((d->opd_pre == NULL || d->opd_pre_recovering) &&
                            d->opd_imp_connected &&
                            !d->opd_got_disconnected)
                                break;
                            d->opd_imp_connected &&
                            !d->opd_got_disconnected)
                                break;
@@ -1213,19 +1221,21 @@ static int osp_precreate_thread(void *_arg)
                if (!osp_precreate_running(d))
                        break;
 
                if (!osp_precreate_running(d))
                        break;
 
-               LASSERT(d->opd_obd->u.cli.cl_seq != NULL);
-               /* Sigh, fid client is not ready yet */
-               if (d->opd_obd->u.cli.cl_seq->lcs_exp == NULL)
-                       continue;
+               if (d->opd_pre) {
+                       LASSERT(d->opd_obd->u.cli.cl_seq != NULL);
+                       /* Sigh, fid client is not ready yet */
+                       if (d->opd_obd->u.cli.cl_seq->lcs_exp == NULL)
+                               continue;
 
 
-               /* Init fid for osp_precreate if necessary */
-               rc = osp_init_pre_fid(d);
-               if (rc != 0) {
-                       class_export_put(d->opd_exp);
-                       d->opd_obd->u.cli.cl_seq->lcs_exp = NULL;
-                       CERROR("%s: init pre fid error: rc = %d\n",
-                              d->opd_obd->obd_name, rc);
-                       continue;
+                       /* Init fid for osp_precreate if necessary */
+                       rc = osp_init_pre_fid(d);
+                       if (rc != 0) {
+                               class_export_put(d->opd_exp);
+                               d->opd_obd->u.cli.cl_seq->lcs_exp = NULL;
+                               CERROR("%s: init pre fid error: rc = %d\n",
+                                               d->opd_obd->obd_name, rc);
+                               continue;
+                       }
                }
 
                if (osp_statfs_update(&env, d)) {
                }
 
                if (osp_statfs_update(&env, d)) {
@@ -1234,14 +1244,18 @@ static int osp_precreate_thread(void *_arg)
                        continue;
                }
 
                        continue;
                }
 
-               /*
-                * Clean up orphans or recreate missing objects.
-                */
-               rc = osp_precreate_cleanup_orphans(&env, d);
-               if (rc != 0) {
-                       schedule_timeout_interruptible(cfs_time_seconds(1));
-                       continue;
+               if (d->opd_pre) {
+                       /*
+                        * Clean up orphans or recreate missing objects.
+                        */
+                       rc = osp_precreate_cleanup_orphans(&env, d);
+                       if (rc != 0) {
+                               schedule_timeout_interruptible(
+                                       msecs_to_jiffies(MSEC_PER_SEC));
+                               continue;
+                       }
                }
                }
+
                /*
                 * connected, can handle precreates now
                 */
                /*
                 * connected, can handle precreates now
                 */
@@ -1264,6 +1278,9 @@ static int osp_precreate_thread(void *_arg)
                                if (osp_statfs_update(&env, d))
                                        break;
 
                                if (osp_statfs_update(&env, d))
                                        break;
 
+                       if (d->opd_pre == NULL)
+                               continue;
+
                        /* To avoid handling different seq in precreate/orphan
                         * cleanup, it will hold precreate until current seq is
                         * used up. */
                        /* To avoid handling different seq in precreate/orphan
                         * cleanup, it will hold precreate until current seq is
                         * used up. */
@@ -1675,9 +1692,6 @@ out:
  */
 int osp_init_precreate(struct osp_device *d)
 {
  */
 int osp_init_precreate(struct osp_device *d)
 {
-       struct l_wait_info       lwi = { 0 };
-       struct task_struct              *task;
-
        ENTRY;
 
        OBD_ALLOC_PTR(d->opd_pre);
        ENTRY;
 
        OBD_ALLOC_PTR(d->opd_pre);
@@ -1685,6 +1699,7 @@ int osp_init_precreate(struct osp_device *d)
                RETURN(-ENOMEM);
 
        /* initially precreation isn't ready */
                RETURN(-ENOMEM);
 
        /* initially precreation isn't ready */
+       init_waitqueue_head(&d->opd_pre_user_waitq);
        d->opd_pre_status = -EAGAIN;
        fid_zero(&d->opd_pre_used_fid);
        d->opd_pre_used_fid.f_oid = 1;
        d->opd_pre_status = -EAGAIN;
        fid_zero(&d->opd_pre_used_fid);
        d->opd_pre_used_fid.f_oid = 1;
@@ -1699,9 +1714,40 @@ int osp_init_precreate(struct osp_device *d)
        d->opd_reserved_mb_high = 0;
        d->opd_reserved_mb_low = 0;
 
        d->opd_reserved_mb_high = 0;
        d->opd_reserved_mb_low = 0;
 
+       RETURN(0);
+}
+
+/**
+ * Finish precreate functionality of OSP
+ *
+ *
+ * Asks all the activity (the thread, update timer) to stop, then
+ * wait till that is done.
+ *
+ * \param[in] d                OSP device
+ */
+void osp_precreate_fini(struct osp_device *d)
+{
+       ENTRY;
+
+       if (d->opd_pre == NULL)
+               RETURN_EXIT;
+
+       OBD_FREE_PTR(d->opd_pre);
+       d->opd_pre = NULL;
+
+       EXIT;
+}
+
+int osp_init_statfs(struct osp_device *d)
+{
+       struct l_wait_info       lwi = { 0 };
+       struct task_struct              *task;
+
+       ENTRY;
+
        spin_lock_init(&d->opd_pre_lock);
        init_waitqueue_head(&d->opd_pre_waitq);
        spin_lock_init(&d->opd_pre_lock);
        init_waitqueue_head(&d->opd_pre_waitq);
-       init_waitqueue_head(&d->opd_pre_user_waitq);
        thread_set_flags(&d->opd_pre_thread, SVC_INIT);
        init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq);
 
        thread_set_flags(&d->opd_pre_thread, SVC_INIT);
        init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq);
 
@@ -1737,34 +1783,18 @@ int osp_init_precreate(struct osp_device *d)
        RETURN(0);
 }
 
        RETURN(0);
 }
 
-/**
- * Finish precreate functionality of OSP
- *
- *
- * Asks all the activity (the thread, update timer) to stop, then
- * wait till that is done.
- *
- * \param[in] d                OSP device
- */
-void osp_precreate_fini(struct osp_device *d)
+void osp_statfs_fini(struct osp_device *d)
 {
        struct ptlrpc_thread *thread = &d->opd_pre_thread;
        ENTRY;
 
        del_timer(&d->opd_statfs_timer);
 
 {
        struct ptlrpc_thread *thread = &d->opd_pre_thread;
        ENTRY;
 
        del_timer(&d->opd_statfs_timer);
 
-       if (d->opd_pre == NULL)
-               RETURN_EXIT;
-
        if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
                thread->t_flags = SVC_STOPPING;
                wake_up(&d->opd_pre_waitq);
                wait_event(thread->t_ctl_waitq, thread_is_stopped(thread));
        }
 
        if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
                thread->t_flags = SVC_STOPPING;
                wake_up(&d->opd_pre_waitq);
                wait_event(thread->t_ctl_waitq, thread_is_stopped(thread));
        }
 
-       OBD_FREE_PTR(d->opd_pre);
-       d->opd_pre = NULL;
-
        EXIT;
 }
        EXIT;
 }
-
index 148317f..b21d5af 100644 (file)
@@ -917,6 +917,9 @@ static void osp_sync_process_record(const struct lu_env *env,
                /* cancel any generation record */
                rc = llog_cat_cancel_records(env, cathandle, 1, &cookie);
 
                /* cancel any generation record */
                rc = llog_cat_cancel_records(env, cathandle, 1, &cookie);
 
+               /* flush all pending records ASAP */
+               osp_sync_force(env, d);
+
                RETURN_EXIT;
        }
 
                RETURN_EXIT;
        }
 
index 914f151..2750967 100644 (file)
@@ -1363,7 +1363,7 @@ struct req_format RQF_MDS_GET_ROOT =
 EXPORT_SYMBOL(RQF_MDS_GET_ROOT);
 
 struct req_format RQF_MDS_STATFS =
 EXPORT_SYMBOL(RQF_MDS_GET_ROOT);
 
 struct req_format RQF_MDS_STATFS =
-        DEFINE_REQ_FMT0("MDS_STATFS", empty, obd_statfs_server);
+       DEFINE_REQ_FMT0("MDS_STATFS", mdt_body_only, obd_statfs_server);
 EXPORT_SYMBOL(RQF_MDS_STATFS);
 
 struct req_format RQF_MDS_SYNC =
 EXPORT_SYMBOL(RQF_MDS_STATFS);
 
 struct req_format RQF_MDS_SYNC =
index 21163e6..e9205ee 100644 (file)
@@ -1758,7 +1758,7 @@ void lustre_swab_obd_statfs (struct obd_statfs *os)
        __swab64s(&os->os_maxbytes);
        __swab32s(&os->os_state);
        __swab32s(&os->os_fprecreated);
        __swab64s(&os->os_maxbytes);
        __swab32s(&os->os_state);
        __swab32s(&os->os_fprecreated);
-       CLASSERT(offsetof(typeof(*os), os_spare2) != 0);
+       __swab32s(&os->os_granted);
        CLASSERT(offsetof(typeof(*os), os_spare3) != 0);
        CLASSERT(offsetof(typeof(*os), os_spare4) != 0);
        CLASSERT(offsetof(typeof(*os), os_spare5) != 0);
        CLASSERT(offsetof(typeof(*os), os_spare3) != 0);
        CLASSERT(offsetof(typeof(*os), os_spare4) != 0);
        CLASSERT(offsetof(typeof(*os), os_spare5) != 0);
index 2883dc9..02b3c96 100644 (file)
@@ -1894,10 +1894,10 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
                 (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
-       LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
-                (long long)(int)offsetof(struct obd_statfs, os_spare2));
-       LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
-                (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+       LASSERTF((int)offsetof(struct obd_statfs, os_granted) == 112, "found %lld\n",
+                (long long)(int)offsetof(struct obd_statfs, os_granted));
+       LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_granted) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct obd_statfs *)0)->os_granted));
        LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
                 (long long)(int)offsetof(struct obd_statfs, os_spare3));
        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",
        LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
                 (long long)(int)offsetof(struct obd_statfs, os_spare3));
        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",
index aad2aaa..b3419ec 100644 (file)
@@ -991,6 +991,9 @@ int tgt_connect(struct tgt_session_info *tsi)
        reply = req_capsule_server_get(tsi->tsi_pill, &RMF_CONNECT_DATA);
        spin_lock(&tsi->tsi_exp->exp_lock);
        *exp_connect_flags_ptr(tsi->tsi_exp) = reply->ocd_connect_flags;
        reply = req_capsule_server_get(tsi->tsi_pill, &RMF_CONNECT_DATA);
        spin_lock(&tsi->tsi_exp->exp_lock);
        *exp_connect_flags_ptr(tsi->tsi_exp) = reply->ocd_connect_flags;
+       if (reply->ocd_connect_flags & OBD_CONNECT_FLAGS2)
+               *exp_connect_flags2_ptr(tsi->tsi_exp) =
+                       reply->ocd_connect_flags2;
        tsi->tsi_exp->exp_connect_data.ocd_brw_size = reply->ocd_brw_size;
        spin_unlock(&tsi->tsi_exp->exp_lock);
 
        tsi->tsi_exp->exp_connect_data.ocd_brw_size = reply->ocd_brw_size;
        spin_unlock(&tsi->tsi_exp->exp_lock);
 
index 752226a..db720fc 100755 (executable)
@@ -1027,7 +1027,7 @@ test_44a() { # was test_44
                do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000701"
                # lctl below may fail, it is valid case
                $LCTL --device $mdcdev recover
                do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000701"
                # lctl below may fail, it is valid case
                $LCTL --device $mdcdev recover
-               df $MOUNT
+               $LFS df $MOUNT
        done
        do_facet $SINGLEMDS "lctl set_param fail_loc=0"
        [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds
        done
        do_facet $SINGLEMDS "lctl set_param fail_loc=0"
        [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds
@@ -3283,13 +3283,19 @@ test_88() { #bug 17485
 }
 run_test 88 "MDS should not assign same objid to different files "
 
 }
 run_test 88 "MDS should not assign same objid to different files "
 
+function calc_osc_kbytes_used() {
+       local kbtotal=$(calc_osc_kbytes kbytestotal)
+       local kbfree=$(calc_osc_kbytes kbytesfree)
+       echo $((kbtotal-kbfree))
+}
+
 test_89() {
        cancel_lru_locks osc
        mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
        rm -f $DIR/$tdir/$tfile
        wait_mds_ost_sync || error "initial MDS-OST sync timed out"
        wait_delete_completed || error "initial wait delete timed out"
 test_89() {
        cancel_lru_locks osc
        mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
        rm -f $DIR/$tdir/$tfile
        wait_mds_ost_sync || error "initial MDS-OST sync timed out"
        wait_delete_completed || error "initial wait delete timed out"
-       local blocks1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+       local blocks1=$(calc_osc_kbytes_used)
        local write_size=$(fs_log_size)
 
        $SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
        local write_size=$(fs_log_size)
 
        $SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
@@ -3311,7 +3317,7 @@ test_89() {
 
        wait_mds_ost_sync || error "MDS-OST sync timed out"
        wait_delete_completed || error "wait delete timed out"
 
        wait_mds_ost_sync || error "MDS-OST sync timed out"
        wait_delete_completed || error "wait delete timed out"
-       local blocks2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+       local blocks2=$(calc_osc_kbytes_used)
 
        [ $((blocks2 - blocks1)) -le $(fs_log_size)  ] ||
                error $((blocks2 - blocks1)) blocks leaked
 
        [ $((blocks2 - blocks1)) -le $(fs_log_size)  ] ||
                error $((blocks2 - blocks1)) blocks leaked
index 868be7a..f5a0868 100755 (executable)
@@ -10756,13 +10756,19 @@ test_133b() {
                ls -l ${testdir}/${tfile} > /dev/null|| error "ls failed"
                check_stats $SINGLEMDS "getattr" 1
        fi
                ls -l ${testdir}/${tfile} > /dev/null|| error "ls failed"
                check_stats $SINGLEMDS "getattr" 1
        fi
+       rm -rf $DIR/${tdir}
+
+       # when DNE is enabled, MDT uses STATFS RPC to ping other targets
+       # so the check below is not reliable
+       [ $MDSCOUNT -eq 1 ] || return 0
+
        # Sleep to avoid a cached response.
        #define OBD_STATFS_CACHE_SECONDS 1
        sleep 2
        $LFS df || error "lfs failed"
        check_stats $SINGLEMDS "statfs" 1
 
        # Sleep to avoid a cached response.
        #define OBD_STATFS_CACHE_SECONDS 1
        sleep 2
        $LFS df || error "lfs failed"
        check_stats $SINGLEMDS "statfs" 1
 
-       rm -rf $DIR/${tdir}
+       return 0
 }
 run_test 133b "Verifying extra MDT stats =================================="
 
 }
 run_test 133b "Verifying extra MDT stats =================================="
 
index f61f37b..8cb87c0 100644 (file)
@@ -1915,10 +1915,10 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
                 (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
-       LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
-                (long long)(int)offsetof(struct obd_statfs, os_spare2));
-       LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
-                (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+       LASSERTF((int)offsetof(struct obd_statfs, os_granted) == 112, "found %lld\n",
+                (long long)(int)offsetof(struct obd_statfs, os_granted));
+       LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_granted) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct obd_statfs *)0)->os_granted));
        LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
                 (long long)(int)offsetof(struct obd_statfs, os_spare3));
        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",
        LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
                 (long long)(int)offsetof(struct obd_statfs, os_spare3));
        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",