From 1dbcd0bab881fac38d8a5e4ef1559f12618f8f0e Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 22 Apr 2022 18:10:36 -0600 Subject: [PATCH 1/1] LU-12998 mds: add no_create parameter to stop creates Add an target tunable parameter and mount option "no_create" to disable new *directory* creation on an MDT. This sends the flag OS_STATFS_NOCREATE to the clients, and the DNE MDT space balance will avoid selecting that MDT when creating a new subdirectory, without disabling access to existing files/dirs. This allows "soft disabling" an MDT in advance of storage upgrades to minimize new directories and files created on that MDT, reduce future migration, and/or backup/restore workload. As yet it does not totally disable *file* creation on the MDT, but it may be extended to do so in the future. This is analogous to the "no_precreate" option that was added on the OSTs, and "no_create" has been added to the OSTs for consistency ("no_precreate" is kept for compatibility for now). Test-Parameters: testlist=conf-sanity env=ONLY=112b,ONLY_REPEAT=50 Signed-off-by: Andreas Dilger Signed-off-by: Lai Siyao Change-Id: I53cfb48ade2f844b18bfc630e7fcea6de9ce7057 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47124 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin --- contrib/scripts/spelling.txt | 3 +- lustre/include/lu_target.h | 3 +- lustre/include/lustre_disk.h | 6 +- lustre/include/obd.h | 2 +- lustre/include/uapi/linux/lustre/lustre_user.h | 5 +- lustre/lmv/lmv_obd.c | 60 ++++++++++++++------ lustre/lod/lod_object.c | 9 ++- lustre/lod/lod_qos.c | 10 +++- lustre/mdt/mdt_handler.c | 4 ++ lustre/mdt/mdt_lproc.c | 50 +++++++++++++++++ lustre/obdclass/lu_tgt_descs.c | 2 +- lustre/obdclass/obd_mount.c | 6 +- lustre/ofd/lproc_ofd.c | 58 +++++++++++-------- lustre/ofd/ofd_dev.c | 6 +- lustre/ofd/ofd_internal.h | 1 - lustre/ofd/ofd_obd.c | 8 +-- lustre/osp/osp_precreate.c | 2 +- lustre/ptlrpc/wiretest.c | 4 +- lustre/tests/conf-sanity.sh | 78 +++++++++++++++++++++++--- lustre/tests/test-framework.sh | 10 +++- lustre/utils/lfs.c | 2 +- lustre/utils/wirecheck.c | 2 +- lustre/utils/wiretest.c | 4 +- 23 files changed, 252 insertions(+), 83 deletions(-) diff --git a/contrib/scripts/spelling.txt b/contrib/scripts/spelling.txt index 5dee6180..b4b0a41 100644 --- a/contrib/scripts/spelling.txt +++ b/contrib/scripts/spelling.txt @@ -113,11 +113,12 @@ OBD_FAIL_TIMEOUT_ORSET||CFS_FAIL_TIMEOUT_ORSET OBD_RACE||CFS_RACE OS_STATE_DEGRADED||OS_STATFS_DEGRADED OS_STATE_READONLY||OS_STATFS_READONLY -OS_STATE_NOPRECREATE||OS_STATFS_NOPRECREATE +OS_STATE_NOPRECREATE||OS_STATFS_NOCREATE OS_STATE_ENOSPC||OS_STATFS_ENOSPC OS_STATE_ENOINO||OS_STATFS_ENOINO OS_STATE_SUM||OS_STATFS_SUM OS_STATE_NONROT||OS_STATFS_NONROT +OS_STATFS_NOPRECREATE||OS_STATFS_NOCREATE page_cache_get||get_page PAGE_CACHE_MASK||PAGE_MASK page_cache_release||put_page diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 95f1201..a06f322 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -174,7 +174,8 @@ struct lu_target { lut_no_reconstruct:1, /* enforce recovery for local clients */ lut_local_recovery:1, - lut_cksum_t10pi_enforce:1; + lut_cksum_t10pi_enforce:1, + lut_no_create:1; /* checksum types supported on this node */ enum cksum_types lut_cksum_types_supported; /** last_rcvd file */ diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 99aff0d..d504bd0 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -83,7 +83,6 @@ #define LMD_PARAMS_MAXLEN 4096 enum lmd_flags { - LMD_FLG_SERVER = 0, /* Mounting a server */ LMD_FLG_CLIENT, /* Mounting a client */ LMD_FLG_SKIP_LFSCK, /* NOT auto resume LFSCK when mount */ LMD_FLG_ABORT_RECOV, /* Abort recovery */ @@ -95,15 +94,14 @@ enum lmd_flags { */ LMD_FLG_WRITECONF, /* Rewrite config log */ LMD_FLG_NOIR, /* NO imperative recovery */ - LMD_FLG_NOSCRUB, /* Do not trigger scrub automatically */ + LMD_FLG_NOSCRUB, /* Do not trigger scrub automatically */ LMD_FLG_MGS, /* Also start MGS along with server */ - LMD_FLG_IAM, /* IAM dir */ LMD_FLG_NO_PRIMNODE, /* all nodes are service nodes */ LMD_FLG_VIRGIN, /* the service registers first time */ LMD_FLG_UPDATE, /* update parameters */ LMD_FLG_HSM, /* Start coordinator */ LMD_FLG_DEV_RDONLY, /* discard modification quitely */ - LMD_FLG_NO_PRECREATE, /* do not allow OST object creation */ + LMD_FLG_NO_CREATE, /* prevent MDT/OST object creation */ LMD_FLG_LOCAL_RECOV, /* force recovery for local clients */ LMD_FLG_ABORT_RECOV_MDT, /* Abort recovery between MDTs */ LMD_FLG_NO_LOCAL_LOGS, /* Use config logs from MGS */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h index e5c9e7f..564b391 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -439,7 +439,7 @@ struct lmv_obd { struct kobject *lmv_tgts_kobj; void *lmv_cache; - __u32 lmv_qos_rr_index; + __u32 lmv_qos_rr_index; /* next round-robin MDT idx */ }; #define lmv_mdt_count lmv_mdt_descs.ltd_lmv_desc.ld_tgt_count diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index a4863a6..990e301 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -264,7 +264,7 @@ typedef struct statx lstatx_t; enum obd_statfs_state { OS_STATFS_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */ OS_STATFS_READONLY = 0x00000002, /**< filesystem is read-only */ - OS_STATFS_NOPRECREATE = 0x00000004, /**< no object precreation */ + OS_STATFS_NOCREATE = 0x00000004, /**< no object creation */ OS_STATFS_UNUSED1 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */ OS_STATFS_UNUSED2 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */ OS_STATFS_ENOSPC = 0x00000020, /**< not enough free space */ @@ -272,6 +272,9 @@ enum obd_statfs_state { OS_STATFS_SUM = 0x00000100, /**< aggregated for all tagrets */ OS_STATFS_NONROT = 0x00000200, /**< non-rotational device */ }; +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0) +#define OS_STATFS_NOPRECREATE OS_STATFS_NOCREATE +#endif /** filesystem statistics/attributes for target device */ struct obd_statfs { diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 8a6fb6c..f295f77 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -563,6 +563,16 @@ static int lmv_disconnect(struct obd_export *exp) RETURN(rc); } +static void lmv_statfs_update(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt, + struct obd_statfs *osfs) +{ + spin_lock(&lmv->lmv_lock); + tgt->ltd_statfs = *osfs; + tgt->ltd_statfs_age = ktime_get_seconds(); + spin_unlock(&lmv->lmv_lock); + set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags); +} + static int lmv_fid2path(struct obd_export *exp, int len, void *karg, void __user *uarg) { @@ -909,9 +919,9 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, 0); if (rc) RETURN(rc); + lmv_statfs_update(lmv, tgt, &stat_buf); if (copy_to_user(data->ioc_pbuf1, &stat_buf, - min((int) data->ioc_plen1, - (int) sizeof(stat_buf)))) + min_t(int, data->ioc_plen1, sizeof(stat_buf)))) RETURN(-EFAULT); break; } @@ -1363,7 +1373,7 @@ out_free_temp: RETURN(rc); } -static int lmv_statfs_update(void *cookie, int rc) +static int lmv_statfs_cb(void *cookie, int rc) { struct obd_info *oinfo = cookie; struct obd_device *obd = oinfo->oi_obd; @@ -1375,13 +1385,8 @@ static int lmv_statfs_update(void *cookie, int rc) * NB: don't deactivate TGT upon error, because we may not trigger async * statfs any longer, then there is no chance to activate TGT. */ - if (!rc) { - spin_lock(&lmv->lmv_lock); - tgt->ltd_statfs = *osfs; - tgt->ltd_statfs_age = ktime_get_seconds(); - spin_unlock(&lmv->lmv_lock); - set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags); - } + if (!rc) + lmv_statfs_update(lmv, tgt, osfs); return rc; } @@ -1392,7 +1397,7 @@ int lmv_statfs_check_update(struct obd_device *obd, struct lmv_tgt_desc *tgt) struct obd_info oinfo = { .oi_obd = obd, .oi_tgt = tgt, - .oi_cb_up = lmv_statfs_update, + .oi_cb_up = lmv_statfs_cb, }; int rc; @@ -1555,7 +1560,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, GOTO(unlock, tgt = ERR_PTR(rc)); lmv_foreach_tgt(lmv, tgt) { - if (!tgt->ltd_exp || !tgt->ltd_active) { + if (!tgt->ltd_exp || !tgt->ltd_active || + (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) { tgt->ltd_qos.ltq_usable = 0; continue; } @@ -1572,7 +1578,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, total_usable++; } - /* If current MDT has above-average space and dir is not aleady using + /* If current MDT has above-average space and dir is not already using * round-robin to spread across more MDTs, stay on the parent MDT * to avoid creating needless remote MDT directories. Remote dirs * close to the root balance space more effectively than bottom dirs, @@ -1627,7 +1633,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv) index = (i + lmv->lmv_qos_rr_index) % lmv->lmv_mdt_descs.ltd_tgts_size; tgt = lmv_tgt(lmv, index); - if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active || + (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) continue; lmv->lmv_qos_rr_index = (tgt->ltd_index + 1) % @@ -1660,7 +1667,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_lf(struct lmv_obd *lmv) GOTO(unlock, tgt = ERR_PTR(-EAGAIN)); lmv_foreach_tgt(lmv, tgt) { - if (!tgt->ltd_exp || !tgt->ltd_active) { + if (!tgt->ltd_exp || !tgt->ltd_active || + (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) { tgt->ltd_qos.ltq_usable = 0; continue; } @@ -1751,7 +1759,7 @@ lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_object *lso, * which is set outside, and if dir is migrating, 'op_data->op_new_layout' * indicates whether old or new layout is used to locate. * - * For plain direcotry, it just locate the MDT of op_data->op_fid1. + * For plain directory, it just locate the MDT of op_data->op_fid1. * * \param[in] lmv LMV device * \param[in/out] op_data client MD stack parameters, name, namelen etc, @@ -1967,7 +1975,8 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv, if (tgt == ERR_PTR(-EAGAIN)) { if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) && !lmv_op_default_rr_mkdir(op_data) && - !lmv_op_user_qos_mkdir(op_data)) + !lmv_op_user_qos_mkdir(op_data) && + !(tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE)) /* if not necessary, don't create remote directory. */ tgt = tmp; else @@ -1979,6 +1988,12 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv, if (!IS_ERR(tgt)) op_data->op_mds = tgt->ltd_index; + /* If space balance was called because the original target was marked + * NOCREATE, periodically check whether the state has changed. + */ + if (tmp != tgt && tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE) + lmv_statfs_check_update(lmv2obd_dev(lmv), tmp); + return tgt; } @@ -2022,6 +2037,9 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, * 2. is "lfs mkdir -i -1"? mkdir by space usage. * 3. is starting MDT specified in default LMV? mkdir on MDT N. * 4. is default LMV space balanced? mkdir by space usage. + * + * If the existing parent or specific MDT selected is deactivated + * with OS_STATFS_NOCREATE then select a different MDT by QOS. */ if (lmv_op_user_specific_mkdir(op_data)) { struct lmv_user_md *lum = op_data->op_data; @@ -2030,6 +2048,8 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, tgt = lmv_tgt(lmv, op_data->op_mds); if (!tgt) RETURN(-ENODEV); + if (unlikely(tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) + GOTO(new_tgt, -EAGAIN); } else if (lmv_op_user_qos_mkdir(op_data)) { tgt = lmv_locate_tgt_by_space(lmv, op_data, tgt); if (IS_ERR(tgt)) @@ -2041,7 +2061,11 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, tgt = lmv_tgt(lmv, op_data->op_mds); if (!tgt) RETURN(-ENODEV); - } else if (lmv_op_default_qos_mkdir(op_data)) { + if (unlikely(tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) + GOTO(new_tgt, -EAGAIN); + } else if (lmv_op_default_qos_mkdir(op_data) || + tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE) { +new_tgt: tgt = lmv_locate_tgt_by_space(lmv, op_data, tgt); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index a438d4f..203b5a4 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -2091,6 +2091,9 @@ static int lod_mdt_alloc_specific(const struct lu_env *env, /* this OSP doesn't feel well */ continue; + if (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE) + continue; + rc = dt_fid_alloc(env, tgt_dt, &fid, NULL, NULL); if (rc < 0) continue; @@ -7041,9 +7044,11 @@ static bool lod_sel_osts_allowed(const struct lu_env *env, if (sfs->os_state & OS_STATFS_ENOSPC || sfs->os_state & OS_STATFS_READONLY || + sfs->os_state & OS_STATFS_NOCREATE || sfs->os_state & OS_STATFS_DEGRADED) { - CDEBUG(D_LAYOUT, "ost %d is not availble for SEL " - "extension, state %u\n", index, sfs->os_state); + CDEBUG(D_LAYOUT, + "OST%04x unusable for SEL extension, state %x\n", + index, sfs->os_state); ret = false; break; } diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 1ad75fb..51f5b63 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -62,8 +62,8 @@ static inline int lod_statfs_check(struct lu_tgt_descs *ltd, if (sfs->os_state & OS_STATFS_READONLY) return -EROFS; - /* object precreation is skipped on targets with max_create_count=0 */ - if (sfs->os_state & OS_STATFS_NOPRECREATE) + /* object creation is skipped on the OST with max_create_count=0 */ + if (!ltd->ltd_is_mdt && sfs->os_state & OS_STATFS_NOCREATE) return -ENOBUFS; return 0; @@ -999,6 +999,9 @@ repeat_find: if (lod_statfs_check(ltd, mdt)) continue; + if (mdt->ltd_statfs.os_state & OS_STATFS_NOCREATE) + continue; + /* try to use another OSP if this one is degraded */ if (mdt->ltd_statfs.os_state & OS_STATFS_DEGRADED && !use_degraded) { @@ -1858,7 +1861,8 @@ int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo, if (mdt->ltd_discon || lod_statfs_check(ltd, mdt)) continue; - if (mdt->ltd_statfs.os_state & OS_STATFS_DEGRADED) + if (mdt->ltd_statfs.os_state & + (OS_STATFS_DEGRADED | OS_STATFS_NOCREATE)) continue; mdt->ltd_qos.ltq_usable = 1; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 24b9e81..8547167 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -573,6 +573,8 @@ static int mdt_statfs(struct tgt_session_info *tsi) osfs->os_bsize - 1) >> current_blockbits)); tgt_grant_sanity_check(mdt->mdt_lu_dev.ld_obd, __func__); + if (mdt->mdt_lut.lut_no_create) + osfs->os_state |= OS_STATFS_NOCREATE; CDEBUG(D_CACHE, "%llu blocks: %llu free, %llu avail; " "%llu objects: %llu free; state %x\n", osfs->os_blocks, osfs->os_bfree, osfs->os_bavail, @@ -6179,6 +6181,8 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, obd_obt_init(obd); if (test_bit(LMD_FLG_SKIP_LFSCK, lsi->lsi_lmd->lmd_flags)) m->mdt_skip_lfsck = 1; + if (test_bit(LMD_FLG_NO_CREATE, lsi->lsi_lmd->lmd_flags)) + m->mdt_lut.lut_no_create = 1; } /* Just try to get a DoM lock by default. Otherwise, having a group diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 39450f3..24bc276 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -892,6 +892,55 @@ static ssize_t enable_dir_auto_split_store(struct kobject *kobj, LUSTRE_RW_ATTR(enable_dir_auto_split); /** + * Show if the MDT is in no create mode. + * + * This means MDT has been adminstratively disabled to prevent it + * from creating any new directories on the MDT, though existing files + * and directories can still be read, written, and unlinked. + * + * \retval number of bytes written + */ +static ssize_t no_create_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", mdt->mdt_lut.lut_no_create); +} + +/** + * Set MDT to no create mode. + * + * This is used to interface to userspace administrative tools to + * disable new directory creation on the MDT. + * + * \param[in] count \a buffer length + * + * \retval \a count on success + * \retval negative number on error + */ +static ssize_t no_create_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + bool val; + int rc; + + rc = kstrtobool(buffer, &val); + if (rc) + return rc; + + mdt->mdt_lut.lut_no_create = val; + + return count; +} +LUSTRE_RW_ATTR(no_create); + +/** * Show MDT async commit count. * * @m seq_file handle @@ -1685,6 +1734,7 @@ static struct attribute *mdt_attrs[] = { &lustre_attr_enable_striped_dir.attr, &lustre_attr_commit_on_sharing.attr, &lustre_attr_local_recovery.attr, + &lustre_attr_no_create.attr, &lustre_attr_async_commit_count.attr, &lustre_attr_sync_count.attr, &lustre_attr_dom_lock.attr, diff --git a/lustre/obdclass/lu_tgt_descs.c b/lustre/obdclass/lu_tgt_descs.c index 697ff5e..0062ded 100644 --- a/lustre/obdclass/lu_tgt_descs.c +++ b/lustre/obdclass/lu_tgt_descs.c @@ -425,7 +425,7 @@ EXPORT_SYMBOL(ltd_del_tgt); * Calculate penalties per-tgt and per-server * * Re-calculate penalties when the configuration changes, active targets - * change and after statfs refresh (all these are reflected by lq_dirty flag). + * change and after statfs refresh (all these are reflected by LQ_DIRTY flag). * On every tgt and server: decay the penalty by half for every 8x the update * interval that the device has been idle. That gives lots of time for the * statfs information to be updated (which the penalty is only a proxy for), diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index af6eebb..1d4a772 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1394,8 +1394,10 @@ int lmd_parse(char *options, struct lustre_mount_data *lmd) max_t(int, simple_strtoul(s1 + 19, NULL, 10), time_min); clear++; - } else if (strncmp(s1, "no_precreate", 12) == 0) { - set_bit(LMD_FLG_NO_PRECREATE, lmd->lmd_flags); + } else if (strncmp(s1, "no_create", 9) == 0 || + /* no_precreate kept for 2.16 compatibility */ + strncmp(s1, "no_precreate", 12) == 0) { + set_bit(LMD_FLG_NO_CREATE, lmd->lmd_flags); clear++; } else if (strncmp(s1, "noir", 4) == 0) { /* test case only */ set_bit(LMD_FLG_NOIR, lmd->lmd_flags); diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index 79091c4..f03d8d1 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -297,18 +297,18 @@ LUSTRE_RW_ATTR(degraded); * * \retval number of bytes written */ -static ssize_t no_precreate_show(struct kobject *kobj, struct attribute *attr, +static ssize_t no_create_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct obd_device *obd = container_of(kobj, struct obd_device, obd_kset.kobj); struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", ofd->ofd_no_precreate); + return scnprintf(buf, PAGE_SIZE, "%u\n", ofd->ofd_lut.lut_no_create); } /** - * Set OFD to no precreate mode. + * Set OFD to no create mode. * * This is used to interface to userspace administrative tools to * disable new object creation on the OST. @@ -318,7 +318,7 @@ static ssize_t no_precreate_show(struct kobject *kobj, struct attribute *attr, * \retval \a count on success * \retval negative number on error */ -static ssize_t no_precreate_store(struct kobject *kobj, struct attribute *attr, +static ssize_t no_create_store(struct kobject *kobj, struct attribute *attr, const char *buffer, size_t count) { struct obd_device *obd = container_of(kobj, struct obd_device, @@ -332,12 +332,19 @@ static ssize_t no_precreate_store(struct kobject *kobj, struct attribute *attr, return rc; spin_lock(&ofd->ofd_flags_lock); - ofd->ofd_no_precreate = val; + ofd->ofd_lut.lut_no_create = val; spin_unlock(&ofd->ofd_flags_lock); return count; } +LUSTRE_RW_ATTR(no_create); + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0) +/* compatibility entry for a few releases */ +#define no_precreate_show no_create_show +#define no_precreate_store no_create_store LUSTRE_RW_ATTR(no_precreate); +#endif /** * Show OFD filesystem type. @@ -993,34 +1000,37 @@ LUSTRE_OBD_UINT_PARAM_ATTR(at_max); LUSTRE_OBD_UINT_PARAM_ATTR(at_history); static struct attribute *ofd_attrs[] = { - &lustre_attr_tot_dirty.attr, - &lustre_attr_tot_granted.attr, - &lustre_attr_tot_pending.attr, + &lustre_attr_access_log_mask.attr, + &lustre_attr_access_log_size.attr, + &lustre_attr_atime_diff.attr, + &lustre_attr_checksum_t10pi_enforce.attr, + &lustre_attr_degraded.attr, + &lustre_attr_eviction_count.attr, + &lustre_attr_fstype.attr, + &lustre_attr_grant_check_threshold.attr, &lustre_attr_grant_compat_disable.attr, + &lustre_attr_grant_precreate.attr, &lustre_attr_instance.attr, - &lustre_attr_recovery_time_hard.attr, - &lustre_attr_recovery_time_soft.attr, &lustre_attr_ir_factor.attr, + &lustre_attr_job_cleanup_interval.attr, + &lustre_attr_lfsck_speed_limit.attr, + &lustre_attr_no_create.attr, +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0) + &lustre_attr_no_precreate.attr, +#endif &lustre_attr_num_exports.attr, - &lustre_attr_grant_check_threshold.attr, - &lustre_attr_eviction_count.attr, - &lustre_attr_seqs_allocated.attr, - &lustre_attr_grant_precreate.attr, &lustre_attr_precreate_batch.attr, - &lustre_attr_atime_diff.attr, - &lustre_attr_degraded.attr, - &lustre_attr_fstype.attr, - &lustre_attr_no_precreate.attr, + &lustre_attr_recovery_time_hard.attr, + &lustre_attr_recovery_time_soft.attr, + &lustre_attr_seqs_allocated.attr, + &lustre_attr_tot_dirty.attr, + &lustre_attr_tot_granted.attr, + &lustre_attr_tot_pending.attr, + &lustre_attr_soft_sync_limit.attr, &lustre_attr_sync_journal.attr, #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0) &lustre_attr_sync_on_lock_cancel.attr, #endif - &lustre_attr_soft_sync_limit.attr, - &lustre_attr_lfsck_speed_limit.attr, - &lustre_attr_access_log_mask.attr, - &lustre_attr_access_log_size.attr, - &lustre_attr_job_cleanup_interval.attr, - &lustre_attr_checksum_t10pi_enforce.attr, &lustre_attr_at_min.attr, &lustre_attr_at_max.attr, &lustre_attr_at_history.attr, diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index fe5660d..4eb489a 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -184,8 +184,8 @@ static int ofd_stack_init(const struct lu_env *env, if (lmd) { if (test_bit(LMD_FLG_SKIP_LFSCK, lmd->lmd_flags)) m->ofd_skip_lfsck = 1; - if (test_bit(LMD_FLG_NO_PRECREATE, lmd->lmd_flags)) - m->ofd_no_precreate = 1; + if (test_bit(LMD_FLG_NO_CREATE, lmd->lmd_flags)) + m->ofd_lut.lut_no_create = 1; bitmap_copy(lmd_flags, lmd->lmd_flags, LMD_FLG_NUM_FLAGS); } @@ -1468,7 +1468,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) if (CFS_FAIL_CHECK(OBD_FAIL_OST_EROFS)) RETURN(-EROFS); - if (ofd->ofd_no_precreate) + if (ofd->ofd_lut.lut_no_create) return -EPERM; repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY); diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 5634ca4..55e1639 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -142,7 +142,6 @@ struct ofd_device { ofd_lastid_rebuilding:1, ofd_record_fid_accessed:1, ofd_lfsck_verify_pfid:1, - ofd_no_precreate:1, ofd_skip_lfsck:1; struct seq_server_site ofd_seq_site; /* the limit of SOFT_SYNC RPCs that will trigger a soft sync */ diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 494ee2f..acf0bcf 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -773,12 +773,12 @@ int ofd_statfs(const struct lu_env *env, struct obd_export *exp, osfs->os_ffree -= osfs->os_ffree; } - /* OS_STATFS_READONLY can be set by OSD already */ + /* OS_STATFS_READONLY can be set by OSD already, only add flags */ if (ofd->ofd_raid_degraded) osfs->os_state |= OS_STATFS_DEGRADED; - if (ofd->ofd_no_precreate) - osfs->os_state |= OS_STATFS_NOPRECREATE; + if (ofd->ofd_lut.lut_no_create) + osfs->os_state |= OS_STATFS_NOCREATE; if (obd->obd_self_export != exp && !exp_grant_param_supp(exp) && current_blockbits > COMPAT_BSIZE_SHIFT) { @@ -1034,7 +1034,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, ENTRY; - if (ofd->ofd_no_precreate) + if (ofd->ofd_lut.lut_no_create) return -EPERM; ofd_info_init(env, exp); diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index 0bd9548..d619351 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -1141,7 +1141,7 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs) /* Object precreation skipped on OST if manually disabled */ if (d->opd_pre_max_create_count == 0) - msfs->os_state |= OS_STATFS_NOPRECREATE; + msfs->os_state |= OS_STATFS_NOCREATE; /* else don't clear flags in new msfs->os_state sent from OST */ update: diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 1da31b5..3a67e93 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -2086,8 +2086,8 @@ void lustre_assert_wire_constants(void) (unsigned)OS_STATFS_DEGRADED); LASSERTF(OS_STATFS_READONLY == 0x00000002UL, "found 0x%.8xUL\n", (unsigned)OS_STATFS_READONLY); - LASSERTF(OS_STATFS_NOPRECREATE == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)OS_STATFS_NOPRECREATE); + LASSERTF(OS_STATFS_NOCREATE == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)OS_STATFS_NOCREATE); LASSERTF(OS_STATFS_ENOSPC == 0x00000020UL, "found 0x%.8xUL\n", (unsigned)OS_STATFS_ENOSPC); LASSERTF(OS_STATFS_ENOINO == 0x00000040UL, "found 0x%.8xUL\n", diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 1bb9346..0960ee2 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -133,12 +133,15 @@ stop_mdt() { } start_mds() { + local mdscount=$MDSCOUNT local num - for num in $(seq $MDSCOUNT); do + [[ "$1" == "--mdscount" ]] && mdscount=$2 && shift 2 + + for ((num=1; num <= $mdscount; num++ )); do start_mdt $num "$@" || return 94 done - for num in $(seq $MDSCOUNT); do + for ((num=1; num <= $mdscount; num++ )); do wait_clients_import_state ${CLIENTS:-$HOSTNAME} mds${num} FULL done } @@ -9241,15 +9244,22 @@ test_111() { } run_test 111 "Adding large_dir with over 2GB directory" -test_112() { +test_112a() { + local param="no_create" + + (( $OST1_VERSION > $(version_code 2.14.0) )) || + skip "need OSS at least 2.14.0" + (( $OST1_VERSION >= $(version_code 2.15.56.125) )) || + param="no_precreate" + start_mds || error "MDS start failed" start_ost || error "OSS start failed" echo "start ost2 service on $(facet_active_host ost2)" - start ost2 $(ostdevname 2) $(csa_add "$OST_MOUNT_OPTS" -o no_precreate) || + start ost2 $(ostdevname 2) $(csa_add "$OST_MOUNT_OPTS" -o $param) || error "start ost2 facet failed" local val=$(do_facet ost2 \ - "$LCTL get_param -n obdfilter.$FSNAME-OST0001*.no_precreate") - (( $val == 1 )) || error "obdfilter.$FSNAME-OST0001*.no_precreate=$val" + "$LCTL get_param -n obdfilter.$FSNAME-OST0001*.$param") + (( $val == 1 )) || error "obdfilter.$FSNAME-OST0001*.$param=$val" mount_client $MOUNT || error "mount client failed" wait_osc_import_state mds1 ost1 FULL @@ -9261,7 +9271,11 @@ test_112() { $LFS setstripe -i 1 $DIR/$tfile.1 && $LFS getstripe $DIR/$tfile.1 && (( $($LFS getstripe -i $DIR/$tfile.1) == 1 )) && error "allowed to create $tfile.1 on OST0001" - do_facet ost2 $LCTL set_param obdfilter.*.no_precreate=0 + $LFS df -v $MOUNT + $LFS df -v $MOUNT | grep -q "OST:1.*N" || + error "NOCREATE not in 'lfs df'" + + do_facet ost2 $LCTL set_param obdfilter.$FSNAME-OST0001*.$param=0 sleep_maxage $LFS setstripe -i 1 $DIR/$tfile.2 || error "failed to create $tfile.2 on ost1 facet" @@ -9270,7 +9284,55 @@ test_112() { stop_ost2 || error "stop ost2 facet failed" cleanup } -run_test 112 "mount OST with nocreate option" +run_test 112a "mount OST with no_create option" + +test_112b() { + (( MDSCOUNT >= 2 )) || skip "need at least 2 MDTs" + (( $MDS1_VERSION >= $(version_code 2.15.56.125) )) || + skip "need MDS >= 2.15.56.125" + local mdsnum=$MDSCOUNT + local facet=mds$mdsnum + local mdtidx=$((mdsnum - 1)) + local mdtname=$FSNAME-MDT$(printf %04x $mdtidx) + + start_mds --mdscount $((mdsnum - 1)) || error "MDS start failed" + start_mdt $mdsnum -o no_create || error "start $facet failed" + local val=$(do_facet $facet \ + "$LCTL get_param -n mdt.$mdtname*.no_create") + (( $val == 1 )) || error "mdt.$mdtname*.no_create=$val" + start_ost || error "ost1 start failed" + start_ost2 || error "ost1 start failed" + + mount_client $MOUNT || error "mount client failed" + wait_osc_import_ready $facet ost2 + + $LFS df -v $MOUNT + $LFS df -v $MOUNT | grep -q "MDT:$mdtidx.*N" || + error "NOCREATE not in 'lfs df'" + + $LFS mkdir -i $mdtidx $DIR/$tdir || + $LFS setdirstripe -D -c 1 -i -1 --max-inherit-rr 2 $DIR/$tdir || + error "error creating $tdir on $mdtname" + stack_trap "rm -rf $DIR/$tdir" + + mkdir $DIR/$tdir/d1.{1..100} || error "mkdir $tdir/d1.{1..100} failed" + $LFS getdirstripe -i $DIR/$tdir/d1.* | sort | uniq -c + do_facet $facet $LCTL set_param mdt.$mdtname*.no_create=0 + # allow one initial create for delayed statfs on client + (( $($LFS getdirstripe -i $DIR/$tdir/d1.* | grep -c $mdtidx) < 2 )) || + error "allowed create on $mdtname" + sleep_maxage_lmv + + mkdir $DIR/$tdir/d2.{1..100} || error "mkdir $tdir/d2.{1..100} failed" + $LFS getdirstripe -i $DIR/$tdir/d2.{1..100} | sort | uniq -c + (( $($LFS getdirstripe -i $DIR/$tdir/d2.* | grep -c $mdtidx) > 10 )) || + error "no create on $mdtname" + # files not cleaned with ONLY_REPEAT because of client unmount below + rm -r $DIR/$tdir + stop_ost2 || error "ost1 start failed" + cleanup +} +run_test 112b "mount MDT with no_create option" # Global for 113 SAVE_MGS_MOUNT_OPTS=$MGS_MOUNT_OPTS diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 6a3daeb..e4d5a29 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -11155,8 +11155,14 @@ rmultiop_stop() { } sleep_maxage() { - local delay=$(do_facet $SINGLEMDS lctl get_param -n lo[vd].*.qos_maxage | - awk '{ print $1 * 2; exit; }') + local delay=$(do_facet mds1 lctl get_param -n lod.*.qos_maxage | + awk '{ print $1 + 5; exit; }') + sleep $delay +} + +sleep_maxage_lmv() { + local delay=$(lctl get_param -n lmv.*.qos_maxage | + awk '{ print $1 + 5; exit; }') sleep $delay } diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 87828b4..14832cb 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -6691,7 +6691,7 @@ static struct obd_statfs_state_names { } oss_names[] = { { .osn_state = OS_STATFS_DEGRADED, .osn_name = 'D', .osn_err = true }, { .osn_state = OS_STATFS_READONLY, .osn_name = 'R', .osn_err = true }, - { .osn_state = OS_STATFS_NOPRECREATE,.osn_name = 'N', .osn_err = true }, + { .osn_state = OS_STATFS_NOCREATE, .osn_name = 'N', .osn_err = true }, { .osn_state = OS_STATFS_UNUSED1, .osn_name = '?', .osn_err = true }, { .osn_state = OS_STATFS_UNUSED2, .osn_name = '?', .osn_err = true }, { .osn_state = OS_STATFS_ENOSPC, .osn_name = 'S', .osn_err = true }, diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index fcc6a7f..afdc7e0 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -1027,7 +1027,7 @@ check_obd_statfs(void) CHECK_VALUE_X(OS_STATFS_DEGRADED); CHECK_VALUE_X(OS_STATFS_READONLY); - CHECK_VALUE_X(OS_STATFS_NOPRECREATE); + CHECK_VALUE_X(OS_STATFS_NOCREATE); CHECK_VALUE_X(OS_STATFS_ENOSPC); CHECK_VALUE_X(OS_STATFS_ENOINO); CHECK_VALUE_X(OS_STATFS_SUM); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 905e71a..ed3b7c3 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -2147,8 +2147,8 @@ void lustre_assert_wire_constants(void) (unsigned)OS_STATFS_DEGRADED); LASSERTF(OS_STATFS_READONLY == 0x00000002UL, "found 0x%.8xUL\n", (unsigned)OS_STATFS_READONLY); - LASSERTF(OS_STATFS_NOPRECREATE == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)OS_STATFS_NOPRECREATE); + LASSERTF(OS_STATFS_NOCREATE == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)OS_STATFS_NOCREATE); LASSERTF(OS_STATFS_ENOSPC == 0x00000020UL, "found 0x%.8xUL\n", (unsigned)OS_STATFS_ENOSPC); LASSERTF(OS_STATFS_ENOINO == 0x00000040UL, "found 0x%.8xUL\n", -- 1.8.3.1