From: Patrick Farrell Date: Wed, 28 Aug 2019 16:54:37 +0000 (-0400) Subject: LU-9341 lod: Add special O_APPEND striping X-Git-Tag: 2.12.90~125 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=e2ac6e1eaa108eef3493837e9bd881629582ea1d LU-9341 lod: Add special O_APPEND striping Files opened with O_APPEND are almost always log files, which generally stay small and do not benefit from being striped widely. Additionally, PFL files accessed with O_APPEND are fully instantiated, meaning that because the files usually stay small, these objects usually wasted. This patch adds special striping for files created with O_APPEND. This is controlled on the MDS by two new proc variables: mdd_append_stripe_count mdd_append_pool If the stripe count is set to 0 and the pool is not set, this functionality is disabled and files created with O_APPEND will be striped like any other file. Signed-off-by: Patrick Farrell Change-Id: I433d1b8c80488a851b8eb26c78cf5519a6cd75bf Reviewed-on: https://review.whamcloud.com/35617 Reviewed-by: Andreas Dilger Reviewed-by: Mike Pershin Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index efe5c3e..9e9f1db 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -389,6 +389,8 @@ struct dt_allocation_hint { const void *dah_eadata; int dah_eadata_len; __u32 dah_mode; + int dah_append_stripes; + char *dah_append_pool; }; /** diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 71b0fa1..13a7e31 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -1076,12 +1076,16 @@ static int lod_attr_get(const struct lu_env *env, } static inline void lod_adjust_stripe_info(struct lod_layout_component *comp, - struct lov_desc *desc) + struct lov_desc *desc, + int append_stripes) { if (comp->llc_pattern != LOV_PATTERN_MDT) { - if (!comp->llc_stripe_count) + if (append_stripes) { + comp->llc_stripe_count = append_stripes; + } else if (!comp->llc_stripe_count) { comp->llc_stripe_count = desc->ld_default_stripe_count; + } } if (comp->llc_stripe_size <= 0) comp->llc_stripe_size = desc->ld_default_stripe_size; @@ -2804,7 +2808,7 @@ static int lod_declare_layout_add(const struct lu_env *env, lod_comp->llc_stripe_count = v1->lmm_stripe_count; lod_comp->llc_stripe_size = v1->lmm_stripe_size; - lod_adjust_stripe_info(lod_comp, desc); + lod_adjust_stripe_info(lod_comp, desc, 0); if (v1->lmm_magic == LOV_USER_MAGIC_V3) { v3 = (struct lov_user_md_v3 *) v1; @@ -4463,7 +4467,8 @@ out: static int lod_get_default_lov_striping(const struct lu_env *env, struct lod_object *lo, - struct lod_default_striping *lds); + struct lod_default_striping *lds, + struct dt_allocation_hint *ah); /** * Implementation of dt_object_operations::do_xattr_set. * @@ -4519,7 +4524,8 @@ static int lod_xattr_set(const struct lu_env *env, bool is_del; /* get existing striping config */ - rc = lod_get_default_lov_striping(env, lod_dt_obj(dt), lds); + rc = lod_get_default_lov_striping(env, lod_dt_obj(dt), lds, + NULL); if (rc) RETURN(rc); @@ -4798,7 +4804,8 @@ skip: */ static int lod_get_default_lov_striping(const struct lu_env *env, struct lod_object *lo, - struct lod_default_striping *lds) + struct lod_default_striping *lds, + struct dt_allocation_hint *ah) { struct lod_thread_info *info = lod_env_info(env); struct lov_user_md_v1 *v1 = NULL; @@ -4841,8 +4848,9 @@ static int lod_get_default_lov_striping(const struct lu_env *env, v1->lmm_magic != LOV_USER_MAGIC_SPECIFIC) RETURN(-ENOTSUPP); - if (v1->lmm_magic == LOV_MAGIC_COMP_V1 || - v1->lmm_magic == LOV_MAGIC_SEL) { + if ((v1->lmm_magic == LOV_MAGIC_COMP_V1 || + v1->lmm_magic == LOV_MAGIC_SEL) && + !(ah && ah->dah_append_stripes)) { comp_v1 = (struct lov_comp_md_v1 *)v1; comp_cnt = comp_v1->lcm_entry_count; if (comp_cnt == 0) @@ -4893,19 +4901,24 @@ static int lod_get_default_lov_striping(const struct lu_env *env, RETURN(-EINVAL); } - CDEBUG(D_LAYOUT, DFID" stripe_count=%d stripe_size=%d " - "stripe_offset=%d\n", + CDEBUG(D_LAYOUT, DFID" stripe_count=%d stripe_size=%d stripe_offset=%d append_stripes=%d\n", PFID(lu_object_fid(&lo->ldo_obj.do_lu)), (int)v1->lmm_stripe_count, (int)v1->lmm_stripe_size, - (int)v1->lmm_stripe_offset); + (int)v1->lmm_stripe_offset, + ah ? ah->dah_append_stripes : 0); - lod_comp->llc_stripe_count = v1->lmm_stripe_count; + if (ah && ah->dah_append_stripes) + lod_comp->llc_stripe_count = ah->dah_append_stripes; + else + lod_comp->llc_stripe_count = v1->lmm_stripe_count; lod_comp->llc_stripe_size = v1->lmm_stripe_size; lod_comp->llc_stripe_offset = v1->lmm_stripe_offset; lod_comp->llc_pattern = v1->lmm_pattern; pool = NULL; - if (v1->lmm_magic == LOV_USER_MAGIC_V3) { + if (ah && ah->dah_append_pool && ah->dah_append_pool[0]) { + pool = ah->dah_append_pool; + } else if (v1->lmm_magic == LOV_USER_MAGIC_V3) { /* XXX: sanity check here */ v3 = (struct lov_user_md_v3 *) v1; if (v3->lmm_pool_name[0] != '\0') @@ -4987,7 +5000,7 @@ static int lod_get_default_striping(const struct lu_env *env, { int rc, rc1; - rc = lod_get_default_lov_striping(env, lo, lds); + rc = lod_get_default_lov_striping(env, lo, lds, NULL); rc1 = lod_get_default_lmv_striping(env, lo, lds); if (rc == 0 && rc1 < 0) rc = rc1; @@ -5070,7 +5083,7 @@ static void lod_striping_from_default(struct lod_object *lo, if (!lo->ldo_is_composite) continue; - lod_adjust_stripe_info(obj_comp, desc); + lod_adjust_stripe_info(obj_comp, desc, 0); } } else if (lds->lds_dir_def_striping_set && S_ISDIR(mode)) { if (lo->ldo_dir_stripe_count == 0) @@ -5090,7 +5103,8 @@ static void lod_striping_from_default(struct lod_object *lo, } } -static inline bool lod_need_inherit_more(struct lod_object *lo, bool from_root) +static inline bool lod_need_inherit_more(struct lod_object *lo, bool from_root, + char *append_pool) { struct lod_layout_component *lod_comp; @@ -5110,6 +5124,9 @@ static inline bool lod_need_inherit_more(struct lod_object *lo, bool from_root) lod_comp->llc_stripe_offset == LOV_OFFSET_DEFAULT)) return true; + if (append_pool && append_pool[0]) + return true; + return false; } @@ -5144,6 +5161,9 @@ static void lod_ah_init(const struct lu_env *env, LASSERT(child); + if (ah->dah_append_stripes == -1) + ah->dah_append_stripes = d->lod_desc.ld_tgt_count; + if (likely(parent)) { nextp = dt_object_child(parent); lp = lod_dt_obj(parent); @@ -5245,7 +5265,7 @@ static void lod_ah_init(const struct lu_env *env, * Try from the parent first. */ if (likely(lp != NULL)) { - rc = lod_get_default_lov_striping(env, lp, lds); + rc = lod_get_default_lov_striping(env, lp, lds, ah); if (rc == 0) lod_striping_from_default(lc, lds, child_mode); } @@ -5273,8 +5293,10 @@ static void lod_ah_init(const struct lu_env *env, * - parent has plain(v1/v3) default layout, and some attributes * are not specified in the default layout; */ - if (d->lod_md_root != NULL && lod_need_inherit_more(lc, true)) { - rc = lod_get_default_lov_striping(env, d->lod_md_root, lds); + if (d->lod_md_root != NULL && + lod_need_inherit_more(lc, true, ah->dah_append_pool)) { + rc = lod_get_default_lov_striping(env, d->lod_md_root, lds, + ah); if (rc) goto out; if (lc->ldo_comp_cnt == 0) { @@ -5305,7 +5327,7 @@ out: * fs default striping may not be explicitly set, or historically set * in config log, use them. */ - if (lod_need_inherit_more(lc, false)) { + if (lod_need_inherit_more(lc, false, ah->dah_append_pool)) { if (lc->ldo_comp_cnt == 0) { rc = lod_alloc_comp_entries(lc, 0, 1); if (rc) @@ -5319,7 +5341,9 @@ out: LASSERT(!lc->ldo_is_composite); lod_comp = &lc->ldo_comp_entries[0]; desc = &d->lod_desc; - lod_adjust_stripe_info(lod_comp, desc); + lod_adjust_stripe_info(lod_comp, desc, ah->dah_append_stripes); + if (ah->dah_append_pool && ah->dah_append_pool[0]) + lod_obj_set_pool(lc, 0, ah->dah_append_pool); } EXIT; diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 1a9a8e6..54fe466 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -154,6 +154,9 @@ static int mdd_init0(const struct lu_env *env, struct mdd_device *mdd, mdd->mdd_changelog_min_gc_interval = CHLOG_MIN_GC_INTERVAL; /* with a very few number of free catalog entries */ mdd->mdd_changelog_min_free_cat_entries = CHLOG_MIN_FREE_CAT_ENTRIES; + /* special default striping for files created with O_APPEND */ + mdd->mdd_append_stripe_count = 1; + mdd->mdd_append_pool[0] = '\0'; dt_conf_get(env, mdd->mdd_child, &mdd->mdd_dt_conf); diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index ac8ecc3..1d70d20 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -150,6 +150,8 @@ struct mdd_device { struct mdd_dot_lustre_objs mdd_dot_lustre_objs; unsigned int mdd_sync_permission; int mdd_connects; + int mdd_append_stripe_count; + char mdd_append_pool[LOV_MAXPOOLNAME + 1]; struct local_oid_storage *mdd_los; struct mdd_generic_thread mdd_orphan_cleanup_thread; struct kobject mdd_kobj; diff --git a/lustre/mdd/mdd_lproc.c b/lustre/mdd/mdd_lproc.c index 3ab3e9d..f18a6ff 100644 --- a/lustre/mdd/mdd_lproc.c +++ b/lustre/mdd/mdd_lproc.c @@ -533,6 +533,117 @@ static int mdd_lfsck_layout_seq_show(struct seq_file *m, void *data) } LDEBUGFS_SEQ_FOPS_RO(mdd_lfsck_layout); +/** + * Show default number of stripes for O_APPEND files. + * + * \param[in] m seq file + * \param[in] v unused for single entry + * + * \retval 0 on success, + * \retval negative error code if failed + */ +static ssize_t append_stripe_count_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct mdd_device *mdd = container_of(kobj, struct mdd_device, + mdd_kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", mdd->mdd_append_stripe_count); +} + +/** + * Set default number of stripes for O_APPEND files. + * + * \param[in] file proc file + * \param[in] buffer string containing the default number of stripes + * for new files + * \param[in] count @buffer length + * \param[in] off unused for single entry + * + * \retval @count on success + * \retval negative error code otherwise + */ +static ssize_t append_stripe_count_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct mdd_device *mdd = container_of(kobj, struct mdd_device, + mdd_kobj); + int stripe_count; + int rc; + + rc = kstrtoint(buffer, 0, &stripe_count); + if (rc) + return rc; + + if (stripe_count < -1) + return -ERANGE; + + mdd->mdd_append_stripe_count = stripe_count; + + return count; +} +LUSTRE_RW_ATTR(append_stripe_count); + +/** + * Show default OST pool for O_APPEND files. + * + * \param[in] kobject proc object + * \param[in] attribute proc attribute + * \param[in] buf output buffer + * + * \retval 0 on success, + * \retval negative error code if failed + */ +static ssize_t append_pool_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct mdd_device *mdd = container_of(kobj, struct mdd_device, + mdd_kobj); + + return snprintf(buf, LOV_MAXPOOLNAME + 1, "%s\n", mdd->mdd_append_pool); +} + +/** + * Set default OST pool for O_APPEND files. + * + * \param[in] kobject proc object + * \param[in] attribute proc attribute + * \param[in] buffer user inputted pool name + * \param[in] count @buffer length + * + * \retval @count on success + * \retval negative error code otherwise + */ +static ssize_t append_pool_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct mdd_device *mdd = container_of(kobj, struct mdd_device, + mdd_kobj); + + if (!count || count > LOV_MAXPOOLNAME + 1) + return -EINVAL; + + /* clear previous value */ + memset(mdd->mdd_append_pool, 0, LOV_MAXPOOLNAME + 1); + + /* entering "none" clears the pool, otherwise copy the new pool */ + if (strncmp("none", buffer, 4)) { + memcpy(mdd->mdd_append_pool, buffer, count); + + /* Trim the trailing '\n' if any */ + if (mdd->mdd_append_pool[count - 1] == '\n') { + /* Don't echo just a newline */ + if (count == 1) + return -EINVAL; + mdd->mdd_append_pool[count - 1] = 0; + } + } + + return count; +} +LUSTRE_RW_ATTR(append_pool); + static struct lprocfs_vars lprocfs_mdd_obd_vars[] = { { .name = "changelog_mask", .fops = &mdd_changelog_mask_fops }, @@ -558,6 +669,8 @@ static struct attribute *mdd_attrs[] = { &lustre_attr_lfsck_async_windows.attr, &lustre_attr_lfsck_speed_limit.attr, &lustre_attr_sync_permission.attr, + &lustre_attr_append_stripe_count.attr, + &lustre_attr_append_pool.attr, NULL, }; diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index e4357ec..364474c 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -2970,6 +2970,7 @@ void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent, struct dt_allocation_hint *hint) { struct dt_object *np = parent ? mdd_object_child(parent) : NULL; + struct mdd_device *mdd = mdd_obj2mdd_dev(child); struct dt_object *nc = mdd_object_child(child); memset(hint, 0, sizeof(*hint)); @@ -2983,6 +2984,19 @@ void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent, } else { hint->dah_eadata = NULL; hint->dah_eadata_len = 0; + if (spec->sp_cr_flags & MDS_OPEN_APPEND) { + if (mdd->mdd_append_stripe_count != 0 || + mdd->mdd_append_pool[0]) + CDEBUG(D_INFO, + "using O_APPEND file striping\n"); + if (mdd->mdd_append_stripe_count) + hint->dah_append_stripes = + mdd->mdd_append_stripe_count; + if (mdd->mdd_append_pool[0]) + hint->dah_append_pool = mdd->mdd_append_pool; + } else { + hint->dah_append_stripes = 0; + } } CDEBUG(D_INFO, DFID" eadata %p len %d\n", PFID(mdd_object_fid(child)), diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 3ba63bc..2735861 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -2828,6 +2828,142 @@ test_27L() { } run_test 27L "lfs pool_list gives correct pool name" +test_27M() { + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.12.57) ]] && + skip "Need MDS version >= than 2.12.57" + remote_mds_nodsh && skip "remote MDS with nodsh" + [[ $OSTCOUNT -lt 2 ]] && skip_env "need > 1 OST" + + test_mkdir $DIR/$tdir + + # Set default striping on directory + $LFS setstripe -C 4 $DIR/$tdir + + echo 1 > $DIR/$tdir/${tfile}.1 + local count=$($LFS getstripe -c $DIR/$tdir/${tfile}.1) + local setcount=4 + [ $count -eq $setcount ] || + error "(1) stripe count $count, should be $setcount" + + # Capture existing append_stripe_count setting for restore + local orig_count=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_stripe_count) + local mdts=$(comma_list $(mdts_nodes)) + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=$orig_count" EXIT + + local appendcount=$orig_count + echo 1 >> $DIR/$tdir/${tfile}.2_append + count=$($LFS getstripe -c $DIR/$tdir/${tfile}.2_append) + [ $count -eq $appendcount ] || + error "(2)stripe count $count, should be $appendcount for append" + + # Disable O_APPEND striping, verify it works + do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=0 + + # Should now get the default striping, which is 4 + setcount=4 + echo 1 >> $DIR/$tdir/${tfile}.3_append + count=$($LFS getstripe -c $DIR/$tdir/${tfile}.3_append) + [ $count -eq $setcount ] || + error "(3) stripe count $count, should be $setcount" + + # Try changing the stripe count for append files + do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=2 + + # Append striping is now 2 (directory default is still 4) + appendcount=2 + echo 1 >> $DIR/$tdir/${tfile}.4_append + count=$($LFS getstripe -c $DIR/$tdir/${tfile}.4_append) + [ $count -eq $appendcount ] || + error "(4) stripe count $count, should be $appendcount for append" + + # Test append stripe count of -1 + do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=-1 + appendcount=$OSTCOUNT + echo 1 >> $DIR/$tdir/${tfile}.5 + count=$($LFS getstripe -c $DIR/$tdir/${tfile}.5) + [ $count -eq $appendcount ] || + error "(5) stripe count $count, should be $appendcount for append" + + # Set append striping back to default of 1 + do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=1 + + # Try a new default striping, PFL + DOM + $LFS setstripe -L mdt -E 1M -E -1 -c 2 $DIR/$tdir + + # Create normal DOM file, DOM returns stripe count == 0 + setcount=0 + touch $DIR/$tdir/${tfile}.6 + count=$($LFS getstripe -c $DIR/$tdir/${tfile}.6) + [ $count -eq $setcount ] || + error "(6) stripe count $count, should be $setcount" + + # Show + appendcount=1 + echo 1 >> $DIR/$tdir/${tfile}.7_append + count=$($LFS getstripe -c $DIR/$tdir/${tfile}.7_append) + [ $count -eq $appendcount ] || + error "(7) stripe count $count, should be $appendcount for append" + + # Clean up DOM layout + $LFS setstripe -d $DIR/$tdir + + # Now test that append striping works when layout is from root + $LFS setstripe -c 2 $MOUNT + # Make a special directory for this + mkdir $DIR/${tdir}/${tdir}.2 + stack_trap "$LFS setstripe -d $MOUNT" EXIT + + # Verify for normal file + setcount=2 + echo 1 > $DIR/${tdir}/${tdir}.2/${tfile}.8 + count=$($LFS getstripe -c $DIR/$tdir/${tdir}.2/${tfile}.8) + [ $count -eq $setcount ] || + error "(8) stripe count $count, should be $setcount" + + appendcount=1 + echo 1 >> $DIR/${tdir}/${tdir}.2/${tfile}.9_append + count=$($LFS getstripe -c $DIR/${tdir}/${tdir}.2/${tfile}.9_append) + [ $count -eq $appendcount ] || + error "(9) stripe count $count, should be $appendcount for append" + + # Now test O_APPEND striping with pools + do_nodes $mdts $LCTL set_param mdd.*.append_pool="$TESTNAME" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_pool='none'" EXIT + + # Create the pool + pool_add $TESTNAME || error "pool creation failed" + pool_add_targets $TESTNAME 0 1 || error "Pool add targets failed" + + echo 1 >> $DIR/$tdir/${tfile}.10_append + + pool=$($LFS getstripe -p $DIR/$tdir/${tfile}.10_append) + [ "$pool" = "$TESTNAME" ] || error "(10) incorrect pool: $pool" + + # Check that count is still correct + appendcount=1 + echo 1 >> $DIR/$tdir/${tfile}.11_append + count=$($LFS getstripe -c $DIR/$tdir/${tfile}.11_append) + [ $count -eq $appendcount ] || + error "(11) stripe count $count, should be $appendcount for append" + + # Disable O_APPEND stripe count, verify pool works separately + do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=0 + + echo 1 >> $DIR/$tdir/${tfile}.12_append + + pool=$($LFS getstripe -p $DIR/$tdir/${tfile}.12_append) + [ "$pool" = "$TESTNAME" ] || error "(12) incorrect pool: $pool" + + # Remove pool setting, verify it's not applied + do_nodes $mdts $LCTL set_param mdd.*.append_pool='none' + + echo 1 >> $DIR/$tdir/${tfile}.13_append + + pool=$($LFS getstripe -p $DIR/$tdir/${tfile}.13_append) + [ "$pool" = "" ] || error "(13) pool found: $pool" +} +run_test 27M "test O_APPEND striping" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091