Whamcloud - gitweb
LU-12998 mds: add no_create parameter to stop creates
authorAndreas Dilger <adilger@whamcloud.com>
Sat, 23 Apr 2022 00:10:36 +0000 (18:10 -0600)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 29 Dec 2023 11:12:43 +0000 (11:12 +0000)
Add an target tunable parameter and mount option "no_create" to
disable new *directory* creation on an MDT.  This sends the
flag OS_STATFS_NOCREATE to the clients, and the DNE MDT space
balance will avoid selecting that MDT when creating a new
subdirectory, without disabling access to existing files/dirs.

This allows "soft disabling" an MDT in advance of storage
upgrades to minimize new directories and files created on that
MDT, reduce future migration, and/or backup/restore workload.

As yet it does not totally disable *file* creation on the MDT,
but it may be extended to do so in the future.

This is analogous to the "no_precreate" option that was added
on the OSTs, and "no_create" has been added to the OSTs for
consistency ("no_precreate" is kept for compatibility for now).

lod_declare_create() checks whether directory create target MDT is
current MDT, this may happen if nocreate is set on some MDT. Upon
such mismatch, call dt_statfs() to fetch latest statfs to know
whether nocreate is set.

lmv_create() will choose another MDT if target MDT is set with
nocreate, but in case the flag is cleared, call obd_statfs() to fetch
cached statfs and check again.

Lustre-change: https://review.whamcloud.com/47124
Lustre-commit: 1dbcd0bab881fac38d8a5e4ef1559f12618f8f0e
Lustre-change: https://review.whamcloud.com/53437
Lustre-commit: 066262a04cb8e0cbf49a20b7bf036d4484399afe (TBD)

Test-Parameters: testlist=conf-sanity env=ONLY=112b,ONLY_REPEAT=50
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I53cfb48ade2f844b18bfc630e7fcea6de9ce7057
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53189
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
23 files changed:
contrib/scripts/spelling.txt
lustre/include/lu_target.h
lustre/include/lustre_disk.h
lustre/include/obd.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lmv/lmv_obd.c
lustre/lod/lod_object.c
lustre/lod/lod_qos.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_lproc.c
lustre/obdclass/lu_tgt_descs.c
lustre/obdclass/obd_mount.c
lustre/ofd/lproc_ofd.c
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_internal.h
lustre/ofd/ofd_obd.c
lustre/osp/osp_precreate.c
lustre/ptlrpc/wiretest.c
lustre/tests/conf-sanity.sh
lustre/tests/test-framework.sh
lustre/utils/lfs.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 2441de9..069da83 100644 (file)
@@ -192,11 +192,12 @@ mti_xattr_buf||mdi_xattr_buf
 nla_strlcpy|nla_strscpy
 OS_STATE_DEGRADED||OS_STATFS_DEGRADED
 OS_STATE_READONLY||OS_STATFS_READONLY
-OS_STATE_NOPRECREATE||OS_STATFS_NOPRECREATE
+OS_STATE_NOPRECREATE||OS_STATFS_NOCREATE
 OS_STATE_ENOSPC||OS_STATFS_ENOSPC
 OS_STATE_ENOINO||OS_STATFS_ENOINO
 OS_STATE_SUM||OS_STATFS_SUM
 OS_STATE_NONROT||OS_STATFS_NONROT
+OS_STATFS_NOPRECREATE||OS_STATFS_NOCREATE
 page_cache_get||get_page
 PAGE_CACHE_MASK||PAGE_MASK
 page_cache_release||put_page
index 39009e4..273f115 100644 (file)
@@ -174,7 +174,8 @@ struct lu_target {
                                 lut_no_reconstruct:1,
                                 /* enforce recovery for local clients */
                                 lut_local_recovery:1,
-                                lut_cksum_t10pi_enforce:1;
+                                lut_cksum_t10pi_enforce:1,
+                                lut_no_create:1;
        /* checksum types supported on this node */
        enum cksum_types         lut_cksum_types_supported;
        /** last_rcvd file */
index ad18b93..b1f5ccc 100644 (file)
@@ -88,7 +88,6 @@ struct lustre_mount_data {
        char   *lmd_nidnet;     /* network to restrict this client to */
 };
 
-#define LMD_FLG_SERVER         0x0001  /* Mounting a server */
 #define LMD_FLG_CLIENT         0x0002  /* Mounting a client */
 #define LMD_FLG_SKIP_LFSCK     0x0004  /* NOT auto resume LFSCK when mount */
 #define LMD_FLG_ABORT_RECOV    0x0008  /* Abort recovery */
@@ -100,13 +99,12 @@ struct lustre_mount_data {
 #define LMD_FLG_NOIR           0x0080  /* NO imperative recovery */
 #define LMD_FLG_NOSCRUB                0x0100  /* Do not trigger scrub automatically */
 #define LMD_FLG_MGS            0x0200  /* Also start MGS along with server */
-#define LMD_FLG_IAM            0x0400  /* IAM dir */
 #define LMD_FLG_NO_PRIMNODE    0x0800  /* all nodes are service nodes */
 #define LMD_FLG_VIRGIN         0x1000  /* the service registers first time */
 #define LMD_FLG_UPDATE         0x2000  /* update parameters */
 #define LMD_FLG_HSM            0x4000  /* Start coordinator */
 #define LMD_FLG_DEV_RDONLY     0x8000  /* discard modification quitely */
-#define LMD_FLG_NO_PRECREATE   0x10000 /* do not allow OST object creation */
+#define LMD_FLG_NO_CREATE      0x10000 /* do not allow OST object creation */
 #define LMD_FLG_LOCAL_RECOV    0x20000 /* force recovery for local clients */
 #define LMD_FLG_ABORT_RECOV_MDT        0x40000 /* Abort recovery between MDTs */
 
index b7bd835..0ddf8e8 100644 (file)
@@ -451,7 +451,7 @@ struct lmv_obd {
        struct kobject          *lmv_tgts_kobj;
        void                    *lmv_cache;
 
-       __u32                   lmv_qos_rr_index;
+       __u32                   lmv_qos_rr_index; /* next round-robin MDT idx */
 };
 
 #define lmv_mdt_count  lmv_mdt_descs.ltd_lmv_desc.ld_tgt_count
index 9d8b626..e667dd4 100644 (file)
@@ -263,7 +263,7 @@ typedef struct statx lstatx_t;
 enum obd_statfs_state {
        OS_STATFS_DEGRADED      = 0x00000001, /**< RAID degraded/rebuilding */
        OS_STATFS_READONLY      = 0x00000002, /**< filesystem is read-only */
-       OS_STATFS_NOPRECREATE   = 0x00000004, /**< no object precreation */
+       OS_STATFS_NOCREATE      = 0x00000004, /**< no object creation */
        OS_STATFS_UNUSED1       = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
        OS_STATFS_UNUSED2       = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
        OS_STATFS_ENOSPC        = 0x00000020, /**< not enough free space */
@@ -271,6 +271,9 @@ enum obd_statfs_state {
        OS_STATFS_SUM           = 0x00000100, /**< aggregated for all tagrets */
        OS_STATFS_NONROT        = 0x00000200, /**< non-rotational device */
 };
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0)
+#define OS_STATFS_NOPRECREATE OS_STATFS_NOCREATE
+#endif
 
 /** filesystem statistics/attributes for target device */
 struct obd_statfs {
index d6e0f13..6f36d42 100644 (file)
@@ -566,6 +566,16 @@ static int lmv_disconnect(struct obd_export *exp)
        RETURN(rc);
 }
 
+static void lmv_statfs_update(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt,
+                             struct obd_statfs *osfs)
+{
+       spin_lock(&lmv->lmv_lock);
+       tgt->ltd_statfs = *osfs;
+       tgt->ltd_statfs_age = ktime_get_seconds();
+       spin_unlock(&lmv->lmv_lock);
+       set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags);
+}
+
 static int lmv_fid2path(struct obd_export *exp, int len, void *karg,
                        void __user *uarg)
 {
@@ -894,9 +904,9 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
                                0);
                if (rc)
                        RETURN(rc);
+               lmv_statfs_update(lmv, tgt, &stat_buf);
                if (copy_to_user(data->ioc_pbuf1, &stat_buf,
-                                min((int) data->ioc_plen1,
-                                    (int) sizeof(stat_buf))))
+                                min_t(int, data->ioc_plen1, sizeof(stat_buf))))
                        RETURN(-EFAULT);
                break;
        }
@@ -1346,7 +1356,7 @@ out_free_temp:
        return rc;
 }
 
-static int lmv_statfs_update(void *cookie, int rc)
+static int lmv_statfs_cb(void *cookie, int rc)
 {
        struct obd_info *oinfo = cookie;
        struct obd_device *obd = oinfo->oi_obd;
@@ -1358,13 +1368,8 @@ static int lmv_statfs_update(void *cookie, int rc)
         * NB: don't deactivate TGT upon error, because we may not trigger async
         * statfs any longer, then there is no chance to activate TGT.
         */
-       if (!rc) {
-               spin_lock(&lmv->lmv_lock);
-               tgt->ltd_statfs = *osfs;
-               tgt->ltd_statfs_age = ktime_get_seconds();
-               spin_unlock(&lmv->lmv_lock);
-               set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags);
-       }
+       if (!rc)
+               lmv_statfs_update(lmv, tgt, osfs);
 
        return rc;
 }
@@ -1375,7 +1380,7 @@ int lmv_statfs_check_update(struct obd_device *obd, struct lmv_tgt_desc *tgt)
        struct obd_info oinfo = {
                .oi_obd = obd,
                .oi_tgt = tgt,
-               .oi_cb_up = lmv_statfs_update,
+               .oi_cb_up = lmv_statfs_cb,
        };
        int rc;
 
@@ -1538,7 +1543,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv,
                GOTO(unlock, tgt = ERR_PTR(rc));
 
        lmv_foreach_tgt(lmv, tgt) {
-               if (!tgt->ltd_exp || !tgt->ltd_active) {
+               if (!tgt->ltd_exp || !tgt->ltd_active ||
+                   (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) {
                        tgt->ltd_qos.ltq_usable = 0;
                        continue;
                }
@@ -1555,7 +1561,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv,
                total_usable++;
        }
 
-       /* If current MDT has above-average space and dir is not aleady using
+       /* If current MDT has above-average space and dir is not already using
         * round-robin to spread across more MDTs, stay on the parent MDT
         * to avoid creating needless remote MDT directories.  Remote dirs
         * close to the root balance space more effectively than bottom dirs,
@@ -1610,7 +1616,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv)
                index = (i + lmv->lmv_qos_rr_index) %
                        lmv->lmv_mdt_descs.ltd_tgts_size;
                tgt = lmv_tgt(lmv, index);
-               if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
+               if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
+                   (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE))
                        continue;
 
                lmv->lmv_qos_rr_index = (tgt->ltd_index + 1) %
@@ -1643,7 +1650,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_lf(struct lmv_obd *lmv)
                GOTO(unlock, tgt = ERR_PTR(-EAGAIN));
 
        lmv_foreach_tgt(lmv, tgt) {
-               if (!tgt->ltd_exp || !tgt->ltd_active) {
+               if (!tgt->ltd_exp || !tgt->ltd_active ||
+                   (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) {
                        tgt->ltd_qos.ltq_usable = 0;
                        continue;
                }
@@ -1734,7 +1742,7 @@ lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_object *lso,
  * which is set outside, and if dir is migrating, 'op_data->op_new_layout'
  * indicates whether old or new layout is used to locate.
  *
- * For plain direcotry, it just locate the MDT of op_data->op_fid1.
+ * For plain directory, it just locate the MDT of op_data->op_fid1.
  *
  * \param[in] lmv              LMV device
  * \param[in/out] op_data      client MD stack parameters, name, namelen etc,
@@ -1950,7 +1958,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv,
        if (tgt == ERR_PTR(-EAGAIN)) {
                if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) &&
                    !lmv_op_default_rr_mkdir(op_data) &&
-                   !lmv_op_user_qos_mkdir(op_data))
+                   !lmv_op_user_qos_mkdir(op_data) &&
+                   !(tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE))
                        /* if not necessary, don't create remote directory. */
                        tgt = tmp;
                else
@@ -1962,9 +1971,26 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv,
        if (!IS_ERR(tgt))
                op_data->op_mds = tgt->ltd_index;
 
+       /* If space balance was called because the original target was marked
+        * NOCREATE, periodically check whether the state has changed.
+        */
+       if (tmp != tgt && tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE)
+               lmv_statfs_check_update(lmv2obd_dev(lmv), tmp);
+
        return tgt;
 }
 
+static bool lmv_tgt_nocreate(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt)
+{
+       if (likely(!(tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)))
+               return false;
+
+       obd_statfs(NULL, tgt->ltd_exp, &tgt->ltd_statfs,
+                  ktime_get_seconds() -
+                       lmv->lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage, 0);
+       return tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE;
+}
+
 int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
                const void *data, size_t datalen, umode_t mode, uid_t uid,
                gid_t gid, kernel_cap_t cap_effective, __u64 rdev,
@@ -2005,6 +2031,9 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
         * 2. is "lfs mkdir -i -1"? mkdir by space usage.
         * 3. is starting MDT specified in default LMV? mkdir on MDT N.
         * 4. is default LMV space balanced? mkdir by space usage.
+        *
+        * If the existing parent or specific MDT selected is deactivated
+        * with OS_STATFS_NOCREATE then select a different MDT by QOS.
         */
        if (lmv_op_user_specific_mkdir(op_data)) {
                struct lmv_user_md *lum = op_data->op_data;
@@ -2013,6 +2042,8 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
                tgt = lmv_tgt(lmv, op_data->op_mds);
                if (!tgt)
                        RETURN(-ENODEV);
+               if (unlikely(lmv_tgt_nocreate(lmv, tgt)))
+                       GOTO(new_tgt, -EAGAIN);
        } else if (lmv_op_user_qos_mkdir(op_data)) {
                tgt = lmv_locate_tgt_by_space(lmv, op_data, tgt);
                if (IS_ERR(tgt))
@@ -2024,7 +2055,11 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
                tgt = lmv_tgt(lmv, op_data->op_mds);
                if (!tgt)
                        RETURN(-ENODEV);
-       } else if (lmv_op_default_qos_mkdir(op_data)) {
+               if (unlikely(lmv_tgt_nocreate(lmv, tgt)))
+                       GOTO(new_tgt, -EAGAIN);
+       } else if (lmv_op_default_qos_mkdir(op_data) ||
+                  unlikely(lmv_tgt_nocreate(lmv, tgt))) {
+new_tgt:
                tgt = lmv_locate_tgt_by_space(lmv, op_data, tgt);
                if (IS_ERR(tgt))
                        RETURN(PTR_ERR(tgt));
index d9ab18f..6932bbf 100644 (file)
@@ -2142,6 +2142,9 @@ static int lod_mdt_alloc_specific(const struct lu_env *env,
                                /* this OSP doesn't feel well */
                                continue;
 
+                       if (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)
+                               continue;
+
                        rc = dt_fid_alloc(env, tgt_dt, &fid, NULL, NULL);
                        if (rc < 0)
                                continue;
@@ -5912,49 +5915,42 @@ static int lod_declare_create(const struct lu_env *env, struct dt_object *dt,
                struct lu_buf buf = { NULL };
                struct lu_buf *lmu = NULL;
 
-               ss = lu_site2seq(dt->do_lu.lo_dev->ld_site);
+               if (hint && !hint->dah_eadata &&
+                   CFS_FAIL_CHECK(OBD_FAIL_MDS_STALE_DIR_LAYOUT)) {
+                       GOTO(out, rc = -EREMOTE);
+               } else if (hint && hint->dah_eadata) {
+                       buf.lb_buf = (void *)hint->dah_eadata;
+                       buf.lb_len = hint->dah_eadata_len;
+                       lmu = &buf;
+               }
 
-               /* If the parent has default stripeEA, and client
-                * did not find it before sending create request,
-                * then MDT will return -EREMOTE, and client will
-                * retrieve the default stripeEA and re-create the
-                * sub directory.
-                *
-                * Note: if dah_eadata != NULL, it means creating the
-                * striped directory with specified stripeEA, then it
-                * should ignore the default stripeEA */
-               if (hint != NULL && hint->dah_eadata == NULL) {
-                       if (OBD_FAIL_CHECK(OBD_FAIL_MDS_STALE_DIR_LAYOUT))
-                               GOTO(out, rc = -EREMOTE);
-
-                       if (lo->ldo_dir_stripe_offset != LMV_OFFSET_DEFAULT &&
-                           lo->ldo_dir_stripe_offset != ss->ss_node_id) {
-                               struct lod_device *lod;
-                               struct lu_tgt_desc *mdt = NULL;
-                               bool found_mdt = false;
-
-                               lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
-                               lod_foreach_mdt(lod, mdt) {
-                                       if (mdt->ltd_index ==
-                                               lo->ldo_dir_stripe_offset) {
-                                               found_mdt = true;
-                                               break;
-                                       }
+               /* if dir target MDT is not current MDT, it's possible that
+                * directory creation is disabled on the target MDT.
+                */
+               ss = lu_site2seq(dt->do_lu.lo_dev->ld_site);
+               if (lo->ldo_dir_stripe_offset != LMV_OFFSET_DEFAULT &&
+                   lo->ldo_dir_stripe_offset != ss->ss_node_id) {
+                       struct lod_device *lod;
+                       struct lu_tgt_desc *mdt;
+                       bool no_create = false;
+
+                       lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+                       rc = -EINVAL;
+                       lod_foreach_mdt(lod, mdt) {
+                               if (mdt->ltd_index ==
+                                   lo->ldo_dir_stripe_offset) {
+                                       rc = -EPROTO;
+                                       /* refresh statfs */
+                                       dt_statfs(env, mdt->ltd_tgt,
+                                                 &mdt->ltd_statfs);
+                                       no_create = (mdt->ltd_statfs.os_state &
+                                                    OS_STATFS_NOCREATE);
+                                       break;
                                }
-
-                               /* If the MDT indicated by stripe_offset can be
-                                * found, then tell client to resend the create
-                                * request to the correct MDT, otherwise return
-                                * error to client */
-                               if (found_mdt)
-                                       GOTO(out, rc = -EREMOTE);
-                               else
-                                       GOTO(out, rc = -EINVAL);
                        }
-               } else if (hint && hint->dah_eadata) {
-                       lmu = &buf;
-                       lmu->lb_buf = (void *)hint->dah_eadata;
-                       lmu->lb_len = hint->dah_eadata_len;
+
+                       if (!no_create)
+                               GOTO(out, rc);
                }
 
                rc = lod_declare_dir_striping_create(env, dt, attr, lmu, dof,
@@ -6708,9 +6704,11 @@ static bool lod_sel_osts_allowed(const struct lu_env *env,
 
                if (sfs->os_state & OS_STATFS_ENOSPC ||
                    sfs->os_state & OS_STATFS_READONLY ||
+                   sfs->os_state & OS_STATFS_NOCREATE ||
                    sfs->os_state & OS_STATFS_DEGRADED) {
-                       CDEBUG(D_LAYOUT, "ost %d is not availble for SEL "
-                              "extension, state %u\n", index, sfs->os_state);
+                       CDEBUG(D_LAYOUT,
+                              "OST%04x unusable for SEL extension, state %x\n",
+                              index, sfs->os_state);
                        ret = false;
                        break;
                }
index 3f1f477..a202441 100644 (file)
@@ -76,8 +76,8 @@ static inline int lod_statfs_check(struct lu_tgt_descs *ltd,
        if (sfs->os_state & OS_STATFS_READONLY)
                return -EROFS;
 
-       /* object precreation is skipped on targets with max_create_count=0 */
-       if (sfs->os_state & OS_STATFS_NOPRECREATE)
+       /* object creation is skipped on the OST with max_create_count=0 */
+       if (!ltd->ltd_is_mdt && sfs->os_state & OS_STATFS_NOCREATE)
                return -ENOBUFS;
 
        return 0;
@@ -1014,6 +1014,9 @@ repeat_find:
                if (lod_statfs_check(ltd, mdt))
                        continue;
 
+               if (mdt->ltd_statfs.os_state & OS_STATFS_NOCREATE)
+                       continue;
+
                /* try to use another OSP if this one is degraded */
                if (mdt->ltd_statfs.os_state & OS_STATFS_DEGRADED &&
                    !use_degraded) {
@@ -1874,7 +1877,8 @@ int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                if (mdt->ltd_discon || lod_statfs_check(ltd, mdt))
                        continue;
 
-               if (mdt->ltd_statfs.os_state & OS_STATFS_DEGRADED)
+               if (mdt->ltd_statfs.os_state &
+                   (OS_STATFS_DEGRADED | OS_STATFS_NOCREATE))
                        continue;
 
                mdt->ltd_qos.ltq_usable = 1;
index de86d74..afc256f 100644 (file)
@@ -553,6 +553,8 @@ static int mdt_statfs(struct tgt_session_info *tsi)
                                   osfs->os_bsize - 1) >> tgd->tgd_blockbits));
 
        tgt_grant_sanity_check(mdt->mdt_lu_dev.ld_obd, __func__);
+       if (mdt->mdt_lut.lut_no_create)
+               osfs->os_state |= OS_STATFS_NOCREATE;
        CDEBUG(D_CACHE, "%llu blocks: %llu free, %llu avail; "
               "%llu objects: %llu free; state %x\n",
               osfs->os_blocks, osfs->os_bfree, osfs->os_bavail,
@@ -6037,6 +6039,8 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
                obd->u.obt.obt_magic = OBT_MAGIC;
                if (lsi->lsi_lmd->lmd_flags & LMD_FLG_SKIP_LFSCK)
                        m->mdt_skip_lfsck = 1;
+               if (lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_CREATE)
+                       m->mdt_lut.lut_no_create = 1;
        }
 
        /* Just try to get a DoM lock by default. Otherwise, having a group
index 0660b6a..5ee90b1 100644 (file)
@@ -771,6 +771,55 @@ MDT_BOOL_RW_ATTR(enable_strict_som);
 MDT_BOOL_RW_ATTR(enable_dmv_xattr);
 
 /**
+ * Show if the MDT is in no create mode.
+ *
+ * This means MDT has been adminstratively disabled to prevent it
+ * from creating any new directories on the MDT, though existing files
+ * and directories can still be read, written, and unlinked.
+ *
+ * \retval             number of bytes written
+ */
+static ssize_t no_create_show(struct kobject *kobj, struct attribute *attr,
+                             char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n", mdt->mdt_lut.lut_no_create);
+}
+
+/**
+ * Set MDT to no create mode.
+ *
+ * This is used to interface to userspace administrative tools to
+ * disable new directory creation on the MDT.
+ *
+ * \param[in] count    \a buffer length
+ *
+ * \retval             \a count on success
+ * \retval             negative number on error
+ */
+static ssize_t no_create_store(struct kobject *kobj, struct attribute *attr,
+                              const char *buffer, size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       mdt->mdt_lut.lut_no_create = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(no_create);
+
+/**
  * Show MDT async commit count.
  *
  * @m          seq_file handle
@@ -1303,6 +1352,7 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_enable_striped_dir.attr,
        &lustre_attr_commit_on_sharing.attr,
        &lustre_attr_local_recovery.attr,
+       &lustre_attr_no_create.attr,
        &lustre_attr_async_commit_count.attr,
        &lustre_attr_sync_count.attr,
        &lustre_attr_dom_lock.attr,
index 5bd9d3a..59accbb 100644 (file)
@@ -429,7 +429,7 @@ EXPORT_SYMBOL(ltd_del_tgt);
  * Calculate penalties per-tgt and per-server
  *
  * Re-calculate penalties when the configuration changes, active targets
- * change and after statfs refresh (all these are reflected by lq_dirty flag).
+ * change and after statfs refresh (all these are reflected by LQ_DIRTY flag).
  * On every tgt and server: decay the penalty by half for every 8x the update
  * interval that the device has been idle. That gives lots of time for the
  * statfs information to be updated (which the penalty is only a proxy for),
index 4412638..d0c6316 100644 (file)
@@ -1361,8 +1361,10 @@ int lmd_parse(char *options, struct lustre_mount_data *lmd)
                                max_t(int, simple_strtoul(s1 + 19, NULL, 10),
                                      time_min);
                        clear++;
-               } else if (strncmp(s1, "no_precreate", 12) == 0) {
-                       lmd->lmd_flags |= LMD_FLG_NO_PRECREATE;
+               } else if (strncmp(s1, "no_create", 9) == 0 ||
+                          /* no_precreate kept for 2.16 compatibility */
+                          strncmp(s1, "no_precreate", 12) == 0) {
+                       lmd->lmd_flags |= LMD_FLG_NO_CREATE;
                        clear++;
                } else if (strncmp(s1, "noir", 4) == 0) {
                        lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
index 750d877..0940c1c 100644 (file)
@@ -298,18 +298,18 @@ LUSTRE_RW_ATTR(degraded);
  *
  * \retval             number of bytes written
  */
-static ssize_t no_precreate_show(struct kobject *kobj, struct attribute *attr,
+static ssize_t no_create_show(struct kobject *kobj, struct attribute *attr,
                                 char *buf)
 {
        struct obd_device *obd = container_of(kobj, struct obd_device,
                                              obd_kset.kobj);
        struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
 
-       return snprintf(buf, PAGE_SIZE, "%u\n", ofd->ofd_no_precreate);
+       return scnprintf(buf, PAGE_SIZE, "%u\n", ofd->ofd_lut.lut_no_create);
 }
 
 /**
- * Set OFD to no precreate mode.
+ * Set OFD to no create mode.
  *
  * This is used to interface to userspace administrative tools to
  * disable new object creation on the OST.
@@ -319,7 +319,7 @@ static ssize_t no_precreate_show(struct kobject *kobj, struct attribute *attr,
  * \retval             \a count on success
  * \retval             negative number on error
  */
-static ssize_t no_precreate_store(struct kobject *kobj, struct attribute *attr,
+static ssize_t no_create_store(struct kobject *kobj, struct attribute *attr,
                                  const char *buffer, size_t count)
 {
        struct obd_device *obd = container_of(kobj, struct obd_device,
@@ -333,12 +333,19 @@ static ssize_t no_precreate_store(struct kobject *kobj, struct attribute *attr,
                return rc;
 
        spin_lock(&ofd->ofd_flags_lock);
-       ofd->ofd_no_precreate = val;
+       ofd->ofd_lut.lut_no_create = val;
        spin_unlock(&ofd->ofd_flags_lock);
 
        return count;
 }
+LUSTRE_RW_ATTR(no_create);
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0)
+/* compatibility entry for a few releases */
+#define no_precreate_show no_create_show
+#define no_precreate_store no_create_store
 LUSTRE_RW_ATTR(no_precreate);
+#endif
 
 /**
  * Show OFD filesystem type.
@@ -1096,32 +1103,35 @@ void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset,
 LPROC_SEQ_FOPS(lprocfs_nid_stats_clear);
 
 static struct attribute *ofd_attrs[] = {
-       &lustre_attr_tot_dirty.attr,
-       &lustre_attr_tot_granted.attr,
-       &lustre_attr_tot_pending.attr,
+       &lustre_attr_access_log_mask.attr,
+       &lustre_attr_access_log_size.attr,
+       &lustre_attr_atime_diff.attr,
+       &lustre_attr_checksum_t10pi_enforce.attr,
+       &lustre_attr_degraded.attr,
+       &lustre_attr_fstype.attr,
        &lustre_attr_grant_compat_disable.attr,
+       &lustre_attr_grant_precreate.attr,
        &lustre_attr_instance.attr,
-       &lustre_attr_recovery_time_hard.attr,
-       &lustre_attr_recovery_time_soft.attr,
        &lustre_attr_ir_factor.attr,
+       &lustre_attr_job_cleanup_interval.attr,
+       &lustre_attr_lfsck_speed_limit.attr,
+       &lustre_attr_no_create.attr,
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0)
+       &lustre_attr_no_precreate.attr,
+#endif
        &lustre_attr_num_exports.attr,
-       &lustre_attr_seqs_allocated.attr,
-       &lustre_attr_grant_precreate.attr,
        &lustre_attr_precreate_batch.attr,
-       &lustre_attr_atime_diff.attr,
-       &lustre_attr_degraded.attr,
-       &lustre_attr_fstype.attr,
-       &lustre_attr_no_precreate.attr,
+       &lustre_attr_recovery_time_hard.attr,
+       &lustre_attr_recovery_time_soft.attr,
+       &lustre_attr_seqs_allocated.attr,
+       &lustre_attr_tot_dirty.attr,
+       &lustre_attr_tot_granted.attr,
+       &lustre_attr_tot_pending.attr,
+       &lustre_attr_soft_sync_limit.attr,
        &lustre_attr_sync_journal.attr,
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0)
        &lustre_attr_sync_on_lock_cancel.attr,
 #endif
-       &lustre_attr_soft_sync_limit.attr,
-       &lustre_attr_lfsck_speed_limit.attr,
-       &lustre_attr_access_log_mask.attr,
-       &lustre_attr_access_log_size.attr,
-       &lustre_attr_job_cleanup_interval.attr,
-       &lustre_attr_checksum_t10pi_enforce.attr,
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 15, 53, 0)
        &lustre_attr_read_cache_enable.attr,
        &lustre_attr_readcache_max_filesize.attr,
index dcaa49e..7081caa 100644 (file)
@@ -187,8 +187,8 @@ static int ofd_stack_init(const struct lu_env *env,
        if (lmd) {
                if (lmd->lmd_flags & LMD_FLG_SKIP_LFSCK)
                        m->ofd_skip_lfsck = 1;
-               if (lmd->lmd_flags & LMD_FLG_NO_PRECREATE)
-                       m->ofd_no_precreate = 1;
+               if (lmd->lmd_flags & LMD_FLG_NO_CREATE)
+                       m->ofd_lut.lut_no_create = 1;
                *lmd_flags = lmd->lmd_flags;
        }
 
@@ -1505,7 +1505,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
        if (OBD_FAIL_CHECK(OBD_FAIL_OST_EROFS))
                RETURN(-EROFS);
 
-       if (ofd->ofd_no_precreate)
+       if (ofd->ofd_lut.lut_no_create)
                return -EPERM;
 
        repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY);
index bf78e70..508d968 100644 (file)
@@ -144,7 +144,6 @@ struct ofd_device {
                                 ofd_lastid_rebuilding:1,
                                 ofd_record_fid_accessed:1,
                                 ofd_lfsck_verify_pfid:1,
-                                ofd_no_precreate:1,
                                 ofd_skip_lfsck:1;
        struct seq_server_site   ofd_seq_site;
        /* the limit of SOFT_SYNC RPCs that will trigger a soft sync */
index 556621b..b072f9e 100644 (file)
@@ -761,12 +761,12 @@ int ofd_statfs(const struct lu_env *env,  struct obd_export *exp,
                osfs->os_ffree -= osfs->os_ffree;
        }
 
-       /* OS_STATFS_READONLY can be set by OSD already */
+       /* OS_STATFS_READONLY can be set by OSD already, only add flags */
        if (ofd->ofd_raid_degraded)
                osfs->os_state |= OS_STATFS_DEGRADED;
 
-       if (ofd->ofd_no_precreate)
-               osfs->os_state |= OS_STATFS_NOPRECREATE;
+       if (ofd->ofd_lut.lut_no_create)
+               osfs->os_state |= OS_STATFS_NOCREATE;
 
        if (obd->obd_self_export != exp && !exp_grant_param_supp(exp) &&
            tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) {
@@ -1023,7 +1023,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp,
 
        ENTRY;
 
-       if (ofd->ofd_no_precreate)
+       if (ofd->ofd_lut.lut_no_create)
                return -EPERM;
 
        ofd_info_init(env, exp);
index 4e7034c..4feecaf 100644 (file)
@@ -1108,7 +1108,7 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs)
 
        /* Object precreation skipped on OST if manually disabled */
        if (d->opd_pre_max_create_count == 0)
-               msfs->os_state |= OS_STATFS_NOPRECREATE;
+               msfs->os_state |= OS_STATFS_NOCREATE;
        /* else don't clear flags in new msfs->os_state sent from OST */
 
 update:
index 4f10a37..3fac4c1 100644 (file)
@@ -2205,8 +2205,8 @@ void lustre_assert_wire_constants(void)
                (unsigned)OS_STATFS_DEGRADED);
        LASSERTF(OS_STATFS_READONLY == 0x00000002UL, "found 0x%.8xUL\n",
                (unsigned)OS_STATFS_READONLY);
-       LASSERTF(OS_STATFS_NOPRECREATE == 0x00000004UL, "found 0x%.8xUL\n",
-               (unsigned)OS_STATFS_NOPRECREATE);
+       LASSERTF(OS_STATFS_NOCREATE == 0x00000004UL, "found 0x%.8xUL\n",
+               (unsigned)OS_STATFS_NOCREATE);
        LASSERTF(OS_STATFS_ENOSPC == 0x00000020UL, "found 0x%.8xUL\n",
                (unsigned)OS_STATFS_ENOSPC);
        LASSERTF(OS_STATFS_ENOINO == 0x00000040UL, "found 0x%.8xUL\n",
index 68192c9..1ce4994 100644 (file)
@@ -133,12 +133,15 @@ stop_mdt() {
 }
 
 start_mds() {
+       local mdscount=$MDSCOUNT
        local num
 
-       for num in $(seq $MDSCOUNT); do
+       [[ "$1" == "--mdscount" ]] && mdscount=$2 && shift 2
+
+       for ((num=1; num <= $mdscount; num++ )); do
                start_mdt $num $@ || return 94
        done
-       for num in $(seq $MDSCOUNT); do
+       for ((num=1; num <= $mdscount; num++ )); do
                wait_clients_import_state ${CLIENTS:-$HOSTNAME} mds${num} FULL
        done
 }
@@ -8614,15 +8617,22 @@ test_111() {
 }
 run_test 111 "Adding large_dir with over 2GB directory"
 
-test_112() {
+test_112a() {
+       local param="no_create"
+
+       (( $OST1_VERSION > $(version_code 2.14.0) )) ||
+               skip "need OSS at least 2.14.0"
+       (( $OST1_VERSION >= $(version_code 2.14.0.115) )) ||
+               param="no_precreate"
+
        start_mds || error "MDS start failed"
        start_ost || error "OSS start failed"
        echo "start ost2 service on $(facet_active_host ost2)"
-       start ost2 $(ostdevname 2) $(csa_add "$OST_MOUNT_OPTS" -o no_precreate) ||
+       start ost2 $(ostdevname 2) $(csa_add "$OST_MOUNT_OPTS" -o $param) ||
                error "start ost2 facet failed"
        local val=$(do_facet ost2 \
-                  "$LCTL get_param -n obdfilter.$FSNAME-OST0001*.no_precreate")
-       (( $val == 1 )) || error "obdfilter.$FSNAME-OST0001*.no_precreate=$val"
+                  "$LCTL get_param -n obdfilter.$FSNAME-OST0001*.$param")
+       (( $val == 1 )) || error "obdfilter.$FSNAME-OST0001*.$param=$val"
 
        mount_client $MOUNT || error "mount client failed"
        wait_osc_import_state mds1 ost1 FULL
@@ -8634,7 +8644,11 @@ test_112() {
        $LFS setstripe -i 1 $DIR/$tfile.1 && $LFS getstripe $DIR/$tfile.1 &&
                (( $($LFS getstripe -i $DIR/$tfile.1) == 1 )) &&
                error "allowed to create $tfile.1 on OST0001"
-       do_facet ost2 $LCTL set_param obdfilter.*.no_precreate=0
+       $LFS df -v $MOUNT
+       $LFS df -v $MOUNT | grep -q "OST:1.*N" ||
+               error "NOCREATE not in 'lfs df'"
+
+       do_facet ost2 $LCTL set_param obdfilter.$FSNAME-OST0001*.$param=0
        sleep_maxage
        $LFS setstripe -i 1 $DIR/$tfile.2 ||
                error "failed to create $tfile.2 on ost1 facet"
@@ -8643,7 +8657,55 @@ test_112() {
        stop_ost2 || error "stop ost2 facet failed"
        cleanup
 }
-run_test 112 "mount OST with nocreate option"
+run_test 112a "mount OST with no_create option"
+
+test_112b() {
+       (( MDSCOUNT >= 2 )) || skip "need at least 2 MDTs"
+       (( $MDS1_VERSION >= $(version_code 2.14.0.115) )) ||
+               skip "need MDS >= 2.14.0.115"
+       local mdsnum=$MDSCOUNT
+       local facet=mds$mdsnum
+       local mdtidx=$((mdsnum - 1))
+       local mdtname=$FSNAME-MDT$(printf %04x $mdtidx)
+
+       start_mds --mdscount $((mdsnum - 1)) || error "MDS start failed"
+       start_mdt $mdsnum -o no_create || error "start $facet failed"
+       local val=$(do_facet $facet \
+                  "$LCTL get_param -n mdt.$mdtname*.no_create")
+       (( $val == 1 )) || error "mdt.$mdtname*.no_create=$val"
+       start_ost || error "ost1 start failed"
+       start_ost2 || error "ost1 start failed"
+
+       mount_client $MOUNT || error "mount client failed"
+       wait_osc_import_ready $facet ost2
+
+       $LFS df -v $MOUNT
+       $LFS df -v $MOUNT | grep -q "MDT:$mdtidx.*N" ||
+               error "NOCREATE not in 'lfs df'"
+
+       $LFS mkdir -i $mdtidx $DIR/$tdir ||
+               $LFS setdirstripe -D -c 1 -i -1 --max-inherit-rr 2 $DIR/$tdir ||
+               error "error creating $tdir on $mdtname"
+       stack_trap "rm -rf $DIR/$tdir"
+
+       mkdir $DIR/$tdir/d1.{1..100} || error "mkdir $tdir/d1.{1..100} failed"
+       $LFS getdirstripe -i $DIR/$tdir/d1.* | sort | uniq -c
+       do_facet $facet $LCTL set_param mdt.$mdtname*.no_create=0
+       # allow one initial create for delayed statfs on client
+       (( $($LFS getdirstripe -i $DIR/$tdir/d1.* | grep -c $mdtidx) < 2 )) ||
+               error "allowed create on $mdtname"
+       sleep_maxage_lmv
+
+       mkdir $DIR/$tdir/d2.{1..100} || error "mkdir $tdir/d2.{1..100} failed"
+       $LFS getdirstripe -i $DIR/$tdir/d2.{1..100} | sort | uniq -c
+       (( $($LFS getdirstripe -i $DIR/$tdir/d2.* | grep -c $mdtidx) > 10 )) ||
+               error "no create on $mdtname"
+       # files not cleaned with ONLY_REPEAT because of client unmount below
+       rm -r $DIR/$tdir
+       stop_ost2 || error "ost1 start failed"
+       cleanup
+}
+run_test 112b "mount MDT with no_create option"
 
 # Global for 113
 SAVE_MGS_MOUNT_OPTS=$MGS_MOUNT_OPTS
index 1b40b64..37d08b8 100755 (executable)
@@ -11227,8 +11227,14 @@ rmultiop_stop() {
 }
 
 sleep_maxage() {
-       local delay=$(do_facet $SINGLEMDS lctl get_param -n lo[vd].*.qos_maxage |
-                     awk '{ print $1 * 2; exit; }')
+       local delay=$(do_facet mds1 lctl get_param -n lod.*.qos_maxage |
+                     awk '{ print $1 + 5; exit; }')
+       sleep $delay
+}
+
+sleep_maxage_lmv() {
+       local delay=$(lctl get_param -n lmv.*.qos_maxage |
+                     awk '{ print $1 + 5; exit; }')
        sleep $delay
 }
 
index 9c6be34..e06359e 100644 (file)
@@ -6769,7 +6769,7 @@ static struct obd_statfs_state_names {
 } oss_names[] = {
        { .osn_state = OS_STATFS_DEGRADED,   .osn_name = 'D', .osn_err = true },
        { .osn_state = OS_STATFS_READONLY,   .osn_name = 'R', .osn_err = true },
-       { .osn_state = OS_STATFS_NOPRECREATE,.osn_name = 'N', .osn_err = true },
+       { .osn_state = OS_STATFS_NOCREATE,   .osn_name = 'N', .osn_err = true },
        { .osn_state = OS_STATFS_UNUSED1,    .osn_name = '?', .osn_err = true },
        { .osn_state = OS_STATFS_UNUSED2,    .osn_name = '?', .osn_err = true },
        { .osn_state = OS_STATFS_ENOSPC,     .osn_name = 'S', .osn_err = true },
index 7d2a380..f7cc11b 100644 (file)
@@ -1044,7 +1044,7 @@ check_obd_statfs(void)
 
        CHECK_VALUE_X(OS_STATFS_DEGRADED);
        CHECK_VALUE_X(OS_STATFS_READONLY);
-       CHECK_VALUE_X(OS_STATFS_NOPRECREATE);
+       CHECK_VALUE_X(OS_STATFS_NOCREATE);
        CHECK_VALUE_X(OS_STATFS_ENOSPC);
        CHECK_VALUE_X(OS_STATFS_ENOINO);
        CHECK_VALUE_X(OS_STATFS_SUM);
index a987dcd..ea9505c 100644 (file)
@@ -2241,8 +2241,8 @@ void lustre_assert_wire_constants(void)
                (unsigned)OS_STATFS_DEGRADED);
        LASSERTF(OS_STATFS_READONLY == 0x00000002UL, "found 0x%.8xUL\n",
                (unsigned)OS_STATFS_READONLY);
-       LASSERTF(OS_STATFS_NOPRECREATE == 0x00000004UL, "found 0x%.8xUL\n",
-               (unsigned)OS_STATFS_NOPRECREATE);
+       LASSERTF(OS_STATFS_NOCREATE == 0x00000004UL, "found 0x%.8xUL\n",
+               (unsigned)OS_STATFS_NOCREATE);
        LASSERTF(OS_STATFS_ENOSPC == 0x00000020UL, "found 0x%.8xUL\n",
                (unsigned)OS_STATFS_ENOSPC);
        LASSERTF(OS_STATFS_ENOINO == 0x00000040UL, "found 0x%.8xUL\n",