From b2be94f559c7639e93ad9ee02d3a2a0cdafa0172 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 22 Apr 2022 18:10:36 -0600 Subject: [PATCH] LU-12998 mds: add no_create parameter to stop creates Add an target tunable parameter and mount option "no_create" to disable new *directory* creation on an MDT. This sends the flag OS_STATFS_NOCREATE to the clients, and the DNE MDT space balance will avoid selecting that MDT when creating a new subdirectory, without disabling access to existing files/dirs. This allows "soft disabling" an MDT in advance of storage upgrades to minimize new directories and files created on that MDT, reduce future migration, and/or backup/restore workload. As yet it does not totally disable *file* creation on the MDT, but it may be extended to do so in the future. This is analogous to the "no_precreate" option that was added on the OSTs, and "no_create" has been added to the OSTs for consistency ("no_precreate" is kept for compatibility for now). lod_declare_create() checks whether directory create target MDT is current MDT, this may happen if nocreate is set on some MDT. Upon such mismatch, call dt_statfs() to fetch latest statfs to know whether nocreate is set. lmv_create() will choose another MDT if target MDT is set with nocreate, but in case the flag is cleared, call obd_statfs() to fetch cached statfs and check again. Lustre-change: https://review.whamcloud.com/47124 Lustre-commit: 1dbcd0bab881fac38d8a5e4ef1559f12618f8f0e Lustre-change: https://review.whamcloud.com/53437 Lustre-commit: 066262a04cb8e0cbf49a20b7bf036d4484399afe (TBD) Test-Parameters: testlist=conf-sanity env=ONLY=112b,ONLY_REPEAT=50 Signed-off-by: Andreas Dilger Signed-off-by: Lai Siyao Change-Id: I53cfb48ade2f844b18bfc630e7fcea6de9ce7057 Reviewed-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53189 Tested-by: jenkins Tested-by: Maloo --- contrib/scripts/spelling.txt | 3 +- lustre/include/lu_target.h | 3 +- lustre/include/lustre_disk.h | 4 +- lustre/include/obd.h | 2 +- lustre/include/uapi/linux/lustre/lustre_user.h | 5 +- lustre/lmv/lmv_obd.c | 71 ++++++++++++++++------ lustre/lod/lod_object.c | 82 +++++++++++++------------- lustre/lod/lod_qos.c | 10 +++- lustre/mdt/mdt_handler.c | 4 ++ lustre/mdt/mdt_lproc.c | 50 ++++++++++++++++ lustre/obdclass/lu_tgt_descs.c | 2 +- lustre/obdclass/obd_mount.c | 6 +- lustre/ofd/lproc_ofd.c | 54 ++++++++++------- lustre/ofd/ofd_dev.c | 6 +- lustre/ofd/ofd_internal.h | 1 - lustre/ofd/ofd_obd.c | 8 +-- lustre/osp/osp_precreate.c | 2 +- lustre/ptlrpc/wiretest.c | 4 +- lustre/tests/conf-sanity.sh | 78 +++++++++++++++++++++--- lustre/tests/test-framework.sh | 10 +++- lustre/utils/lfs.c | 2 +- lustre/utils/wirecheck.c | 2 +- lustre/utils/wiretest.c | 4 +- 23 files changed, 293 insertions(+), 120 deletions(-) diff --git a/contrib/scripts/spelling.txt b/contrib/scripts/spelling.txt index 2441de9..069da83 100644 --- a/contrib/scripts/spelling.txt +++ b/contrib/scripts/spelling.txt @@ -192,11 +192,12 @@ mti_xattr_buf||mdi_xattr_buf nla_strlcpy|nla_strscpy OS_STATE_DEGRADED||OS_STATFS_DEGRADED OS_STATE_READONLY||OS_STATFS_READONLY -OS_STATE_NOPRECREATE||OS_STATFS_NOPRECREATE +OS_STATE_NOPRECREATE||OS_STATFS_NOCREATE OS_STATE_ENOSPC||OS_STATFS_ENOSPC OS_STATE_ENOINO||OS_STATFS_ENOINO OS_STATE_SUM||OS_STATFS_SUM OS_STATE_NONROT||OS_STATFS_NONROT +OS_STATFS_NOPRECREATE||OS_STATFS_NOCREATE page_cache_get||get_page PAGE_CACHE_MASK||PAGE_MASK page_cache_release||put_page diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 39009e4..273f115 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -174,7 +174,8 @@ struct lu_target { lut_no_reconstruct:1, /* enforce recovery for local clients */ lut_local_recovery:1, - lut_cksum_t10pi_enforce:1; + lut_cksum_t10pi_enforce:1, + lut_no_create:1; /* checksum types supported on this node */ enum cksum_types lut_cksum_types_supported; /** last_rcvd file */ diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index ad18b93..b1f5ccc 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -88,7 +88,6 @@ struct lustre_mount_data { char *lmd_nidnet; /* network to restrict this client to */ }; -#define LMD_FLG_SERVER 0x0001 /* Mounting a server */ #define LMD_FLG_CLIENT 0x0002 /* Mounting a client */ #define LMD_FLG_SKIP_LFSCK 0x0004 /* NOT auto resume LFSCK when mount */ #define LMD_FLG_ABORT_RECOV 0x0008 /* Abort recovery */ @@ -100,13 +99,12 @@ struct lustre_mount_data { #define LMD_FLG_NOIR 0x0080 /* NO imperative recovery */ #define LMD_FLG_NOSCRUB 0x0100 /* Do not trigger scrub automatically */ #define LMD_FLG_MGS 0x0200 /* Also start MGS along with server */ -#define LMD_FLG_IAM 0x0400 /* IAM dir */ #define LMD_FLG_NO_PRIMNODE 0x0800 /* all nodes are service nodes */ #define LMD_FLG_VIRGIN 0x1000 /* the service registers first time */ #define LMD_FLG_UPDATE 0x2000 /* update parameters */ #define LMD_FLG_HSM 0x4000 /* Start coordinator */ #define LMD_FLG_DEV_RDONLY 0x8000 /* discard modification quitely */ -#define LMD_FLG_NO_PRECREATE 0x10000 /* do not allow OST object creation */ +#define LMD_FLG_NO_CREATE 0x10000 /* do not allow OST object creation */ #define LMD_FLG_LOCAL_RECOV 0x20000 /* force recovery for local clients */ #define LMD_FLG_ABORT_RECOV_MDT 0x40000 /* Abort recovery between MDTs */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h index b7bd835..0ddf8e8 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -451,7 +451,7 @@ struct lmv_obd { struct kobject *lmv_tgts_kobj; void *lmv_cache; - __u32 lmv_qos_rr_index; + __u32 lmv_qos_rr_index; /* next round-robin MDT idx */ }; #define lmv_mdt_count lmv_mdt_descs.ltd_lmv_desc.ld_tgt_count diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 9d8b626..e667dd4 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -263,7 +263,7 @@ typedef struct statx lstatx_t; enum obd_statfs_state { OS_STATFS_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */ OS_STATFS_READONLY = 0x00000002, /**< filesystem is read-only */ - OS_STATFS_NOPRECREATE = 0x00000004, /**< no object precreation */ + OS_STATFS_NOCREATE = 0x00000004, /**< no object creation */ OS_STATFS_UNUSED1 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */ OS_STATFS_UNUSED2 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */ OS_STATFS_ENOSPC = 0x00000020, /**< not enough free space */ @@ -271,6 +271,9 @@ enum obd_statfs_state { OS_STATFS_SUM = 0x00000100, /**< aggregated for all tagrets */ OS_STATFS_NONROT = 0x00000200, /**< non-rotational device */ }; +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0) +#define OS_STATFS_NOPRECREATE OS_STATFS_NOCREATE +#endif /** filesystem statistics/attributes for target device */ struct obd_statfs { diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index d6e0f13..6f36d42 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -566,6 +566,16 @@ static int lmv_disconnect(struct obd_export *exp) RETURN(rc); } +static void lmv_statfs_update(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt, + struct obd_statfs *osfs) +{ + spin_lock(&lmv->lmv_lock); + tgt->ltd_statfs = *osfs; + tgt->ltd_statfs_age = ktime_get_seconds(); + spin_unlock(&lmv->lmv_lock); + set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags); +} + static int lmv_fid2path(struct obd_export *exp, int len, void *karg, void __user *uarg) { @@ -894,9 +904,9 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, 0); if (rc) RETURN(rc); + lmv_statfs_update(lmv, tgt, &stat_buf); if (copy_to_user(data->ioc_pbuf1, &stat_buf, - min((int) data->ioc_plen1, - (int) sizeof(stat_buf)))) + min_t(int, data->ioc_plen1, sizeof(stat_buf)))) RETURN(-EFAULT); break; } @@ -1346,7 +1356,7 @@ out_free_temp: return rc; } -static int lmv_statfs_update(void *cookie, int rc) +static int lmv_statfs_cb(void *cookie, int rc) { struct obd_info *oinfo = cookie; struct obd_device *obd = oinfo->oi_obd; @@ -1358,13 +1368,8 @@ static int lmv_statfs_update(void *cookie, int rc) * NB: don't deactivate TGT upon error, because we may not trigger async * statfs any longer, then there is no chance to activate TGT. */ - if (!rc) { - spin_lock(&lmv->lmv_lock); - tgt->ltd_statfs = *osfs; - tgt->ltd_statfs_age = ktime_get_seconds(); - spin_unlock(&lmv->lmv_lock); - set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags); - } + if (!rc) + lmv_statfs_update(lmv, tgt, osfs); return rc; } @@ -1375,7 +1380,7 @@ int lmv_statfs_check_update(struct obd_device *obd, struct lmv_tgt_desc *tgt) struct obd_info oinfo = { .oi_obd = obd, .oi_tgt = tgt, - .oi_cb_up = lmv_statfs_update, + .oi_cb_up = lmv_statfs_cb, }; int rc; @@ -1538,7 +1543,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, GOTO(unlock, tgt = ERR_PTR(rc)); lmv_foreach_tgt(lmv, tgt) { - if (!tgt->ltd_exp || !tgt->ltd_active) { + if (!tgt->ltd_exp || !tgt->ltd_active || + (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) { tgt->ltd_qos.ltq_usable = 0; continue; } @@ -1555,7 +1561,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, total_usable++; } - /* If current MDT has above-average space and dir is not aleady using + /* If current MDT has above-average space and dir is not already using * round-robin to spread across more MDTs, stay on the parent MDT * to avoid creating needless remote MDT directories. Remote dirs * close to the root balance space more effectively than bottom dirs, @@ -1610,7 +1616,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv) index = (i + lmv->lmv_qos_rr_index) % lmv->lmv_mdt_descs.ltd_tgts_size; tgt = lmv_tgt(lmv, index); - if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active || + (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) continue; lmv->lmv_qos_rr_index = (tgt->ltd_index + 1) % @@ -1643,7 +1650,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_lf(struct lmv_obd *lmv) GOTO(unlock, tgt = ERR_PTR(-EAGAIN)); lmv_foreach_tgt(lmv, tgt) { - if (!tgt->ltd_exp || !tgt->ltd_active) { + if (!tgt->ltd_exp || !tgt->ltd_active || + (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) { tgt->ltd_qos.ltq_usable = 0; continue; } @@ -1734,7 +1742,7 @@ lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_object *lso, * which is set outside, and if dir is migrating, 'op_data->op_new_layout' * indicates whether old or new layout is used to locate. * - * For plain direcotry, it just locate the MDT of op_data->op_fid1. + * For plain directory, it just locate the MDT of op_data->op_fid1. * * \param[in] lmv LMV device * \param[in/out] op_data client MD stack parameters, name, namelen etc, @@ -1950,7 +1958,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv, if (tgt == ERR_PTR(-EAGAIN)) { if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) && !lmv_op_default_rr_mkdir(op_data) && - !lmv_op_user_qos_mkdir(op_data)) + !lmv_op_user_qos_mkdir(op_data) && + !(tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE)) /* if not necessary, don't create remote directory. */ tgt = tmp; else @@ -1962,9 +1971,26 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv, if (!IS_ERR(tgt)) op_data->op_mds = tgt->ltd_index; + /* If space balance was called because the original target was marked + * NOCREATE, periodically check whether the state has changed. + */ + if (tmp != tgt && tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE) + lmv_statfs_check_update(lmv2obd_dev(lmv), tmp); + return tgt; } +static bool lmv_tgt_nocreate(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt) +{ + if (likely(!(tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE))) + return false; + + obd_statfs(NULL, tgt->ltd_exp, &tgt->ltd_statfs, + ktime_get_seconds() - + lmv->lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage, 0); + return tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE; +} + int lmv_create(struct obd_export *exp, struct md_op_data *op_data, const void *data, size_t datalen, umode_t mode, uid_t uid, gid_t gid, kernel_cap_t cap_effective, __u64 rdev, @@ -2005,6 +2031,9 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, * 2. is "lfs mkdir -i -1"? mkdir by space usage. * 3. is starting MDT specified in default LMV? mkdir on MDT N. * 4. is default LMV space balanced? mkdir by space usage. + * + * If the existing parent or specific MDT selected is deactivated + * with OS_STATFS_NOCREATE then select a different MDT by QOS. */ if (lmv_op_user_specific_mkdir(op_data)) { struct lmv_user_md *lum = op_data->op_data; @@ -2013,6 +2042,8 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, tgt = lmv_tgt(lmv, op_data->op_mds); if (!tgt) RETURN(-ENODEV); + if (unlikely(lmv_tgt_nocreate(lmv, tgt))) + GOTO(new_tgt, -EAGAIN); } else if (lmv_op_user_qos_mkdir(op_data)) { tgt = lmv_locate_tgt_by_space(lmv, op_data, tgt); if (IS_ERR(tgt)) @@ -2024,7 +2055,11 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, tgt = lmv_tgt(lmv, op_data->op_mds); if (!tgt) RETURN(-ENODEV); - } else if (lmv_op_default_qos_mkdir(op_data)) { + if (unlikely(lmv_tgt_nocreate(lmv, tgt))) + GOTO(new_tgt, -EAGAIN); + } else if (lmv_op_default_qos_mkdir(op_data) || + unlikely(lmv_tgt_nocreate(lmv, tgt))) { +new_tgt: tgt = lmv_locate_tgt_by_space(lmv, op_data, tgt); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index d9ab18f..6932bbf 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -2142,6 +2142,9 @@ static int lod_mdt_alloc_specific(const struct lu_env *env, /* this OSP doesn't feel well */ continue; + if (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE) + continue; + rc = dt_fid_alloc(env, tgt_dt, &fid, NULL, NULL); if (rc < 0) continue; @@ -5912,49 +5915,42 @@ static int lod_declare_create(const struct lu_env *env, struct dt_object *dt, struct lu_buf buf = { NULL }; struct lu_buf *lmu = NULL; - ss = lu_site2seq(dt->do_lu.lo_dev->ld_site); + if (hint && !hint->dah_eadata && + CFS_FAIL_CHECK(OBD_FAIL_MDS_STALE_DIR_LAYOUT)) { + GOTO(out, rc = -EREMOTE); + } else if (hint && hint->dah_eadata) { + buf.lb_buf = (void *)hint->dah_eadata; + buf.lb_len = hint->dah_eadata_len; + lmu = &buf; + } - /* If the parent has default stripeEA, and client - * did not find it before sending create request, - * then MDT will return -EREMOTE, and client will - * retrieve the default stripeEA and re-create the - * sub directory. - * - * Note: if dah_eadata != NULL, it means creating the - * striped directory with specified stripeEA, then it - * should ignore the default stripeEA */ - if (hint != NULL && hint->dah_eadata == NULL) { - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_STALE_DIR_LAYOUT)) - GOTO(out, rc = -EREMOTE); - - if (lo->ldo_dir_stripe_offset != LMV_OFFSET_DEFAULT && - lo->ldo_dir_stripe_offset != ss->ss_node_id) { - struct lod_device *lod; - struct lu_tgt_desc *mdt = NULL; - bool found_mdt = false; - - lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev); - lod_foreach_mdt(lod, mdt) { - if (mdt->ltd_index == - lo->ldo_dir_stripe_offset) { - found_mdt = true; - break; - } + /* if dir target MDT is not current MDT, it's possible that + * directory creation is disabled on the target MDT. + */ + ss = lu_site2seq(dt->do_lu.lo_dev->ld_site); + if (lo->ldo_dir_stripe_offset != LMV_OFFSET_DEFAULT && + lo->ldo_dir_stripe_offset != ss->ss_node_id) { + struct lod_device *lod; + struct lu_tgt_desc *mdt; + bool no_create = false; + + lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev); + rc = -EINVAL; + lod_foreach_mdt(lod, mdt) { + if (mdt->ltd_index == + lo->ldo_dir_stripe_offset) { + rc = -EPROTO; + /* refresh statfs */ + dt_statfs(env, mdt->ltd_tgt, + &mdt->ltd_statfs); + no_create = (mdt->ltd_statfs.os_state & + OS_STATFS_NOCREATE); + break; } - - /* If the MDT indicated by stripe_offset can be - * found, then tell client to resend the create - * request to the correct MDT, otherwise return - * error to client */ - if (found_mdt) - GOTO(out, rc = -EREMOTE); - else - GOTO(out, rc = -EINVAL); } - } else if (hint && hint->dah_eadata) { - lmu = &buf; - lmu->lb_buf = (void *)hint->dah_eadata; - lmu->lb_len = hint->dah_eadata_len; + + if (!no_create) + GOTO(out, rc); } rc = lod_declare_dir_striping_create(env, dt, attr, lmu, dof, @@ -6708,9 +6704,11 @@ static bool lod_sel_osts_allowed(const struct lu_env *env, if (sfs->os_state & OS_STATFS_ENOSPC || sfs->os_state & OS_STATFS_READONLY || + sfs->os_state & OS_STATFS_NOCREATE || sfs->os_state & OS_STATFS_DEGRADED) { - CDEBUG(D_LAYOUT, "ost %d is not availble for SEL " - "extension, state %u\n", index, sfs->os_state); + CDEBUG(D_LAYOUT, + "OST%04x unusable for SEL extension, state %x\n", + index, sfs->os_state); ret = false; break; } diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 3f1f477..a202441 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -76,8 +76,8 @@ static inline int lod_statfs_check(struct lu_tgt_descs *ltd, if (sfs->os_state & OS_STATFS_READONLY) return -EROFS; - /* object precreation is skipped on targets with max_create_count=0 */ - if (sfs->os_state & OS_STATFS_NOPRECREATE) + /* object creation is skipped on the OST with max_create_count=0 */ + if (!ltd->ltd_is_mdt && sfs->os_state & OS_STATFS_NOCREATE) return -ENOBUFS; return 0; @@ -1014,6 +1014,9 @@ repeat_find: if (lod_statfs_check(ltd, mdt)) continue; + if (mdt->ltd_statfs.os_state & OS_STATFS_NOCREATE) + continue; + /* try to use another OSP if this one is degraded */ if (mdt->ltd_statfs.os_state & OS_STATFS_DEGRADED && !use_degraded) { @@ -1874,7 +1877,8 @@ int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo, if (mdt->ltd_discon || lod_statfs_check(ltd, mdt)) continue; - if (mdt->ltd_statfs.os_state & OS_STATFS_DEGRADED) + if (mdt->ltd_statfs.os_state & + (OS_STATFS_DEGRADED | OS_STATFS_NOCREATE)) continue; mdt->ltd_qos.ltq_usable = 1; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index de86d74..afc256f 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -553,6 +553,8 @@ static int mdt_statfs(struct tgt_session_info *tsi) osfs->os_bsize - 1) >> tgd->tgd_blockbits)); tgt_grant_sanity_check(mdt->mdt_lu_dev.ld_obd, __func__); + if (mdt->mdt_lut.lut_no_create) + osfs->os_state |= OS_STATFS_NOCREATE; CDEBUG(D_CACHE, "%llu blocks: %llu free, %llu avail; " "%llu objects: %llu free; state %x\n", osfs->os_blocks, osfs->os_bfree, osfs->os_bavail, @@ -6037,6 +6039,8 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, obd->u.obt.obt_magic = OBT_MAGIC; if (lsi->lsi_lmd->lmd_flags & LMD_FLG_SKIP_LFSCK) m->mdt_skip_lfsck = 1; + if (lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_CREATE) + m->mdt_lut.lut_no_create = 1; } /* Just try to get a DoM lock by default. Otherwise, having a group diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 0660b6a..5ee90b1 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -771,6 +771,55 @@ MDT_BOOL_RW_ATTR(enable_strict_som); MDT_BOOL_RW_ATTR(enable_dmv_xattr); /** + * Show if the MDT is in no create mode. + * + * This means MDT has been adminstratively disabled to prevent it + * from creating any new directories on the MDT, though existing files + * and directories can still be read, written, and unlinked. + * + * \retval number of bytes written + */ +static ssize_t no_create_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", mdt->mdt_lut.lut_no_create); +} + +/** + * Set MDT to no create mode. + * + * This is used to interface to userspace administrative tools to + * disable new directory creation on the MDT. + * + * \param[in] count \a buffer length + * + * \retval \a count on success + * \retval negative number on error + */ +static ssize_t no_create_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + bool val; + int rc; + + rc = kstrtobool(buffer, &val); + if (rc) + return rc; + + mdt->mdt_lut.lut_no_create = val; + + return count; +} +LUSTRE_RW_ATTR(no_create); + +/** * Show MDT async commit count. * * @m seq_file handle @@ -1303,6 +1352,7 @@ static struct attribute *mdt_attrs[] = { &lustre_attr_enable_striped_dir.attr, &lustre_attr_commit_on_sharing.attr, &lustre_attr_local_recovery.attr, + &lustre_attr_no_create.attr, &lustre_attr_async_commit_count.attr, &lustre_attr_sync_count.attr, &lustre_attr_dom_lock.attr, diff --git a/lustre/obdclass/lu_tgt_descs.c b/lustre/obdclass/lu_tgt_descs.c index 5bd9d3a..59accbb 100644 --- a/lustre/obdclass/lu_tgt_descs.c +++ b/lustre/obdclass/lu_tgt_descs.c @@ -429,7 +429,7 @@ EXPORT_SYMBOL(ltd_del_tgt); * Calculate penalties per-tgt and per-server * * Re-calculate penalties when the configuration changes, active targets - * change and after statfs refresh (all these are reflected by lq_dirty flag). + * change and after statfs refresh (all these are reflected by LQ_DIRTY flag). * On every tgt and server: decay the penalty by half for every 8x the update * interval that the device has been idle. That gives lots of time for the * statfs information to be updated (which the penalty is only a proxy for), diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 4412638..d0c6316 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1361,8 +1361,10 @@ int lmd_parse(char *options, struct lustre_mount_data *lmd) max_t(int, simple_strtoul(s1 + 19, NULL, 10), time_min); clear++; - } else if (strncmp(s1, "no_precreate", 12) == 0) { - lmd->lmd_flags |= LMD_FLG_NO_PRECREATE; + } else if (strncmp(s1, "no_create", 9) == 0 || + /* no_precreate kept for 2.16 compatibility */ + strncmp(s1, "no_precreate", 12) == 0) { + lmd->lmd_flags |= LMD_FLG_NO_CREATE; clear++; } else if (strncmp(s1, "noir", 4) == 0) { lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */ diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index 750d877..0940c1c 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -298,18 +298,18 @@ LUSTRE_RW_ATTR(degraded); * * \retval number of bytes written */ -static ssize_t no_precreate_show(struct kobject *kobj, struct attribute *attr, +static ssize_t no_create_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct obd_device *obd = container_of(kobj, struct obd_device, obd_kset.kobj); struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); - return snprintf(buf, PAGE_SIZE, "%u\n", ofd->ofd_no_precreate); + return scnprintf(buf, PAGE_SIZE, "%u\n", ofd->ofd_lut.lut_no_create); } /** - * Set OFD to no precreate mode. + * Set OFD to no create mode. * * This is used to interface to userspace administrative tools to * disable new object creation on the OST. @@ -319,7 +319,7 @@ static ssize_t no_precreate_show(struct kobject *kobj, struct attribute *attr, * \retval \a count on success * \retval negative number on error */ -static ssize_t no_precreate_store(struct kobject *kobj, struct attribute *attr, +static ssize_t no_create_store(struct kobject *kobj, struct attribute *attr, const char *buffer, size_t count) { struct obd_device *obd = container_of(kobj, struct obd_device, @@ -333,12 +333,19 @@ static ssize_t no_precreate_store(struct kobject *kobj, struct attribute *attr, return rc; spin_lock(&ofd->ofd_flags_lock); - ofd->ofd_no_precreate = val; + ofd->ofd_lut.lut_no_create = val; spin_unlock(&ofd->ofd_flags_lock); return count; } +LUSTRE_RW_ATTR(no_create); + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0) +/* compatibility entry for a few releases */ +#define no_precreate_show no_create_show +#define no_precreate_store no_create_store LUSTRE_RW_ATTR(no_precreate); +#endif /** * Show OFD filesystem type. @@ -1096,32 +1103,35 @@ void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset, LPROC_SEQ_FOPS(lprocfs_nid_stats_clear); static struct attribute *ofd_attrs[] = { - &lustre_attr_tot_dirty.attr, - &lustre_attr_tot_granted.attr, - &lustre_attr_tot_pending.attr, + &lustre_attr_access_log_mask.attr, + &lustre_attr_access_log_size.attr, + &lustre_attr_atime_diff.attr, + &lustre_attr_checksum_t10pi_enforce.attr, + &lustre_attr_degraded.attr, + &lustre_attr_fstype.attr, &lustre_attr_grant_compat_disable.attr, + &lustre_attr_grant_precreate.attr, &lustre_attr_instance.attr, - &lustre_attr_recovery_time_hard.attr, - &lustre_attr_recovery_time_soft.attr, &lustre_attr_ir_factor.attr, + &lustre_attr_job_cleanup_interval.attr, + &lustre_attr_lfsck_speed_limit.attr, + &lustre_attr_no_create.attr, +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0) + &lustre_attr_no_precreate.attr, +#endif &lustre_attr_num_exports.attr, - &lustre_attr_seqs_allocated.attr, - &lustre_attr_grant_precreate.attr, &lustre_attr_precreate_batch.attr, - &lustre_attr_atime_diff.attr, - &lustre_attr_degraded.attr, - &lustre_attr_fstype.attr, - &lustre_attr_no_precreate.attr, + &lustre_attr_recovery_time_hard.attr, + &lustre_attr_recovery_time_soft.attr, + &lustre_attr_seqs_allocated.attr, + &lustre_attr_tot_dirty.attr, + &lustre_attr_tot_granted.attr, + &lustre_attr_tot_pending.attr, + &lustre_attr_soft_sync_limit.attr, &lustre_attr_sync_journal.attr, #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0) &lustre_attr_sync_on_lock_cancel.attr, #endif - &lustre_attr_soft_sync_limit.attr, - &lustre_attr_lfsck_speed_limit.attr, - &lustre_attr_access_log_mask.attr, - &lustre_attr_access_log_size.attr, - &lustre_attr_job_cleanup_interval.attr, - &lustre_attr_checksum_t10pi_enforce.attr, #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 15, 53, 0) &lustre_attr_read_cache_enable.attr, &lustre_attr_readcache_max_filesize.attr, diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index dcaa49e..7081caa 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -187,8 +187,8 @@ static int ofd_stack_init(const struct lu_env *env, if (lmd) { if (lmd->lmd_flags & LMD_FLG_SKIP_LFSCK) m->ofd_skip_lfsck = 1; - if (lmd->lmd_flags & LMD_FLG_NO_PRECREATE) - m->ofd_no_precreate = 1; + if (lmd->lmd_flags & LMD_FLG_NO_CREATE) + m->ofd_lut.lut_no_create = 1; *lmd_flags = lmd->lmd_flags; } @@ -1505,7 +1505,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) if (OBD_FAIL_CHECK(OBD_FAIL_OST_EROFS)) RETURN(-EROFS); - if (ofd->ofd_no_precreate) + if (ofd->ofd_lut.lut_no_create) return -EPERM; repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY); diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index bf78e70..508d968 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -144,7 +144,6 @@ struct ofd_device { ofd_lastid_rebuilding:1, ofd_record_fid_accessed:1, ofd_lfsck_verify_pfid:1, - ofd_no_precreate:1, ofd_skip_lfsck:1; struct seq_server_site ofd_seq_site; /* the limit of SOFT_SYNC RPCs that will trigger a soft sync */ diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 556621b..b072f9e 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -761,12 +761,12 @@ int ofd_statfs(const struct lu_env *env, struct obd_export *exp, osfs->os_ffree -= osfs->os_ffree; } - /* OS_STATFS_READONLY can be set by OSD already */ + /* OS_STATFS_READONLY can be set by OSD already, only add flags */ if (ofd->ofd_raid_degraded) osfs->os_state |= OS_STATFS_DEGRADED; - if (ofd->ofd_no_precreate) - osfs->os_state |= OS_STATFS_NOPRECREATE; + if (ofd->ofd_lut.lut_no_create) + osfs->os_state |= OS_STATFS_NOCREATE; if (obd->obd_self_export != exp && !exp_grant_param_supp(exp) && tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) { @@ -1023,7 +1023,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, ENTRY; - if (ofd->ofd_no_precreate) + if (ofd->ofd_lut.lut_no_create) return -EPERM; ofd_info_init(env, exp); diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index 4e7034c..4feecaf 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -1108,7 +1108,7 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs) /* Object precreation skipped on OST if manually disabled */ if (d->opd_pre_max_create_count == 0) - msfs->os_state |= OS_STATFS_NOPRECREATE; + msfs->os_state |= OS_STATFS_NOCREATE; /* else don't clear flags in new msfs->os_state sent from OST */ update: diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 4f10a37..3fac4c1 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -2205,8 +2205,8 @@ void lustre_assert_wire_constants(void) (unsigned)OS_STATFS_DEGRADED); LASSERTF(OS_STATFS_READONLY == 0x00000002UL, "found 0x%.8xUL\n", (unsigned)OS_STATFS_READONLY); - LASSERTF(OS_STATFS_NOPRECREATE == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)OS_STATFS_NOPRECREATE); + LASSERTF(OS_STATFS_NOCREATE == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)OS_STATFS_NOCREATE); LASSERTF(OS_STATFS_ENOSPC == 0x00000020UL, "found 0x%.8xUL\n", (unsigned)OS_STATFS_ENOSPC); LASSERTF(OS_STATFS_ENOINO == 0x00000040UL, "found 0x%.8xUL\n", diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 68192c9..1ce4994 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -133,12 +133,15 @@ stop_mdt() { } start_mds() { + local mdscount=$MDSCOUNT local num - for num in $(seq $MDSCOUNT); do + [[ "$1" == "--mdscount" ]] && mdscount=$2 && shift 2 + + for ((num=1; num <= $mdscount; num++ )); do start_mdt $num $@ || return 94 done - for num in $(seq $MDSCOUNT); do + for ((num=1; num <= $mdscount; num++ )); do wait_clients_import_state ${CLIENTS:-$HOSTNAME} mds${num} FULL done } @@ -8614,15 +8617,22 @@ test_111() { } run_test 111 "Adding large_dir with over 2GB directory" -test_112() { +test_112a() { + local param="no_create" + + (( $OST1_VERSION > $(version_code 2.14.0) )) || + skip "need OSS at least 2.14.0" + (( $OST1_VERSION >= $(version_code 2.14.0.115) )) || + param="no_precreate" + start_mds || error "MDS start failed" start_ost || error "OSS start failed" echo "start ost2 service on $(facet_active_host ost2)" - start ost2 $(ostdevname 2) $(csa_add "$OST_MOUNT_OPTS" -o no_precreate) || + start ost2 $(ostdevname 2) $(csa_add "$OST_MOUNT_OPTS" -o $param) || error "start ost2 facet failed" local val=$(do_facet ost2 \ - "$LCTL get_param -n obdfilter.$FSNAME-OST0001*.no_precreate") - (( $val == 1 )) || error "obdfilter.$FSNAME-OST0001*.no_precreate=$val" + "$LCTL get_param -n obdfilter.$FSNAME-OST0001*.$param") + (( $val == 1 )) || error "obdfilter.$FSNAME-OST0001*.$param=$val" mount_client $MOUNT || error "mount client failed" wait_osc_import_state mds1 ost1 FULL @@ -8634,7 +8644,11 @@ test_112() { $LFS setstripe -i 1 $DIR/$tfile.1 && $LFS getstripe $DIR/$tfile.1 && (( $($LFS getstripe -i $DIR/$tfile.1) == 1 )) && error "allowed to create $tfile.1 on OST0001" - do_facet ost2 $LCTL set_param obdfilter.*.no_precreate=0 + $LFS df -v $MOUNT + $LFS df -v $MOUNT | grep -q "OST:1.*N" || + error "NOCREATE not in 'lfs df'" + + do_facet ost2 $LCTL set_param obdfilter.$FSNAME-OST0001*.$param=0 sleep_maxage $LFS setstripe -i 1 $DIR/$tfile.2 || error "failed to create $tfile.2 on ost1 facet" @@ -8643,7 +8657,55 @@ test_112() { stop_ost2 || error "stop ost2 facet failed" cleanup } -run_test 112 "mount OST with nocreate option" +run_test 112a "mount OST with no_create option" + +test_112b() { + (( MDSCOUNT >= 2 )) || skip "need at least 2 MDTs" + (( $MDS1_VERSION >= $(version_code 2.14.0.115) )) || + skip "need MDS >= 2.14.0.115" + local mdsnum=$MDSCOUNT + local facet=mds$mdsnum + local mdtidx=$((mdsnum - 1)) + local mdtname=$FSNAME-MDT$(printf %04x $mdtidx) + + start_mds --mdscount $((mdsnum - 1)) || error "MDS start failed" + start_mdt $mdsnum -o no_create || error "start $facet failed" + local val=$(do_facet $facet \ + "$LCTL get_param -n mdt.$mdtname*.no_create") + (( $val == 1 )) || error "mdt.$mdtname*.no_create=$val" + start_ost || error "ost1 start failed" + start_ost2 || error "ost1 start failed" + + mount_client $MOUNT || error "mount client failed" + wait_osc_import_ready $facet ost2 + + $LFS df -v $MOUNT + $LFS df -v $MOUNT | grep -q "MDT:$mdtidx.*N" || + error "NOCREATE not in 'lfs df'" + + $LFS mkdir -i $mdtidx $DIR/$tdir || + $LFS setdirstripe -D -c 1 -i -1 --max-inherit-rr 2 $DIR/$tdir || + error "error creating $tdir on $mdtname" + stack_trap "rm -rf $DIR/$tdir" + + mkdir $DIR/$tdir/d1.{1..100} || error "mkdir $tdir/d1.{1..100} failed" + $LFS getdirstripe -i $DIR/$tdir/d1.* | sort | uniq -c + do_facet $facet $LCTL set_param mdt.$mdtname*.no_create=0 + # allow one initial create for delayed statfs on client + (( $($LFS getdirstripe -i $DIR/$tdir/d1.* | grep -c $mdtidx) < 2 )) || + error "allowed create on $mdtname" + sleep_maxage_lmv + + mkdir $DIR/$tdir/d2.{1..100} || error "mkdir $tdir/d2.{1..100} failed" + $LFS getdirstripe -i $DIR/$tdir/d2.{1..100} | sort | uniq -c + (( $($LFS getdirstripe -i $DIR/$tdir/d2.* | grep -c $mdtidx) > 10 )) || + error "no create on $mdtname" + # files not cleaned with ONLY_REPEAT because of client unmount below + rm -r $DIR/$tdir + stop_ost2 || error "ost1 start failed" + cleanup +} +run_test 112b "mount MDT with no_create option" # Global for 113 SAVE_MGS_MOUNT_OPTS=$MGS_MOUNT_OPTS diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 1b40b64..37d08b8 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -11227,8 +11227,14 @@ rmultiop_stop() { } sleep_maxage() { - local delay=$(do_facet $SINGLEMDS lctl get_param -n lo[vd].*.qos_maxage | - awk '{ print $1 * 2; exit; }') + local delay=$(do_facet mds1 lctl get_param -n lod.*.qos_maxage | + awk '{ print $1 + 5; exit; }') + sleep $delay +} + +sleep_maxage_lmv() { + local delay=$(lctl get_param -n lmv.*.qos_maxage | + awk '{ print $1 + 5; exit; }') sleep $delay } diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 9c6be34..e06359e 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -6769,7 +6769,7 @@ static struct obd_statfs_state_names { } oss_names[] = { { .osn_state = OS_STATFS_DEGRADED, .osn_name = 'D', .osn_err = true }, { .osn_state = OS_STATFS_READONLY, .osn_name = 'R', .osn_err = true }, - { .osn_state = OS_STATFS_NOPRECREATE,.osn_name = 'N', .osn_err = true }, + { .osn_state = OS_STATFS_NOCREATE, .osn_name = 'N', .osn_err = true }, { .osn_state = OS_STATFS_UNUSED1, .osn_name = '?', .osn_err = true }, { .osn_state = OS_STATFS_UNUSED2, .osn_name = '?', .osn_err = true }, { .osn_state = OS_STATFS_ENOSPC, .osn_name = 'S', .osn_err = true }, diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 7d2a380..f7cc11b 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -1044,7 +1044,7 @@ check_obd_statfs(void) CHECK_VALUE_X(OS_STATFS_DEGRADED); CHECK_VALUE_X(OS_STATFS_READONLY); - CHECK_VALUE_X(OS_STATFS_NOPRECREATE); + CHECK_VALUE_X(OS_STATFS_NOCREATE); CHECK_VALUE_X(OS_STATFS_ENOSPC); CHECK_VALUE_X(OS_STATFS_ENOINO); CHECK_VALUE_X(OS_STATFS_SUM); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index a987dcd..ea9505c 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -2241,8 +2241,8 @@ void lustre_assert_wire_constants(void) (unsigned)OS_STATFS_DEGRADED); LASSERTF(OS_STATFS_READONLY == 0x00000002UL, "found 0x%.8xUL\n", (unsigned)OS_STATFS_READONLY); - LASSERTF(OS_STATFS_NOPRECREATE == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)OS_STATFS_NOPRECREATE); + LASSERTF(OS_STATFS_NOCREATE == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)OS_STATFS_NOCREATE); LASSERTF(OS_STATFS_ENOSPC == 0x00000020UL, "found 0x%.8xUL\n", (unsigned)OS_STATFS_ENOSPC); LASSERTF(OS_STATFS_ENOINO == 0x00000040UL, "found 0x%.8xUL\n", -- 1.8.3.1