From 03963106926883cf322e085feb8caa3ea64db1d1 Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Thu, 7 Apr 2022 11:31:07 -0500 Subject: [PATCH] LU-15727 lod: honor append_pool with default composite layouts In lod_get_default_lov_striping(), correct the handling of composite default layouts in the case where append_stripe_count is nonzero. Align the names of the append members of struct dt_allocation_hint with the mdd params. Remove the unused dah_mode member of struct dt_allocation_hint. Add sanity test_27U() to verify. Fixes: e2ac6e1eaa ("LU-9341 lod: Add special O_APPEND striping") Signed-off-by: John L. Hammond Change-Id: I66b426d24d6476fb483397f290229983f3da4be5 Reviewed-on: https://review.whamcloud.com/47014 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Patrick Farrell Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/dt_object.h | 5 +- lustre/lod/lod_internal.h | 6 -- lustre/lod/lod_object.c | 188 ++++++++++++++++++++++----------------- lustre/mdd/mdd_object.c | 20 +---- lustre/osd-ldiskfs/osd_handler.c | 1 - lustre/osd-zfs/osd_object.c | 1 - lustre/osp/osp_md_object.c | 1 - lustre/tests/sanity.sh | 85 +++++++++++++++--- 8 files changed, 186 insertions(+), 121 deletions(-) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 3df2027..fb28777 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -412,12 +412,11 @@ extern const struct dt_index_features dt_nodemap_features; struct dt_allocation_hint { struct dt_object *dah_parent; const void *dah_eadata; + const char *dah_append_pool; int dah_eadata_len; + int dah_append_stripe_count; int dah_acl_len; - __u32 dah_mode; - int dah_append_stripes; bool dah_can_block; - char *dah_append_pool; }; /** diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index dbeee1b..bdab936 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -363,12 +363,6 @@ static inline int lod_set_pool(char **pool, const char *new_pool) return 0; } -static inline int lod_set_def_pool(struct lod_default_striping *lds, - int i, const char *new_pool) -{ - return lod_set_pool(&lds->lds_def_comp_entries[i].llc_pool, new_pool); -} - static inline int lod_obj_set_pool(struct lod_object *lo, int i, const char *new_pool) { diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 019236e..a2d36fa 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -5303,15 +5303,21 @@ skip: static int lod_get_default_lov_striping(const struct lu_env *env, struct lod_object *lo, struct lod_default_striping *lds, - struct dt_allocation_hint *ah) + struct dt_allocation_hint *dah) { struct lod_thread_info *info = lod_env_info(env); struct lov_user_md_v1 *v1 = NULL; struct lov_user_md_v3 *v3 = NULL; - struct lov_comp_md_v1 *comp_v1 = NULL; - __u16 comp_cnt; - __u16 mirror_cnt; - bool composite; + struct lov_comp_md_v1 *lcm = NULL; + __u32 magic; + int append_stripe_count = dah != NULL ? dah->dah_append_stripe_count : 0; + const char *append_pool = (dah != NULL && + dah->dah_append_pool != NULL && + dah->dah_append_pool[0] != '\0') ? + dah->dah_append_pool : NULL; + __u16 entry_count = 1; + __u16 mirror_count = 0; + bool want_composite = false; int rc, i, j; ENTRY; @@ -5325,116 +5331,133 @@ static int lod_get_default_lov_striping(const struct lu_env *env, if (rc < (typeof(rc))sizeof(struct lov_user_md)) RETURN(0); - v1 = info->lti_ea_store; - if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V1)) { - lustre_swab_lov_user_md_v1(v1); - } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3)) { - v3 = (struct lov_user_md_v3 *)v1; - lustre_swab_lov_user_md_v3(v3); - } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_SPECIFIC)) { - v3 = (struct lov_user_md_v3 *)v1; + magic = *(__u32 *)info->lti_ea_store; + if (magic == __swab32(LOV_USER_MAGIC_V1)) { + lustre_swab_lov_user_md_v1(info->lti_ea_store); + } else if (magic == __swab32(LOV_USER_MAGIC_V3)) { + lustre_swab_lov_user_md_v3(info->lti_ea_store); + } else if (magic == __swab32(LOV_USER_MAGIC_SPECIFIC)) { + v3 = (struct lov_user_md_v3 *)info->lti_ea_store; lustre_swab_lov_user_md_v3(v3); lustre_swab_lov_user_md_objects(v3->lmm_objects, v3->lmm_stripe_count); - } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_COMP_V1) || - v1->lmm_magic == __swab32(LOV_USER_MAGIC_SEL)) { - comp_v1 = (struct lov_comp_md_v1 *)v1; - lustre_swab_lov_comp_md_v1(comp_v1); + } else if (magic == __swab32(LOV_USER_MAGIC_COMP_V1) || + magic == __swab32(LOV_USER_MAGIC_SEL)) { + lustre_swab_lov_comp_md_v1(info->lti_ea_store); } - if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1 && - v1->lmm_magic != LOV_MAGIC_COMP_V1 && - v1->lmm_magic != LOV_MAGIC_SEL && - v1->lmm_magic != LOV_USER_MAGIC_SPECIFIC) + switch (magic) { + case LOV_MAGIC_V1: + case LOV_MAGIC_V3: + case LOV_USER_MAGIC_SPECIFIC: + v1 = info->lti_ea_store; + break; + case LOV_MAGIC_COMP_V1: + case LOV_MAGIC_SEL: + lcm = info->lti_ea_store; + entry_count = lcm->lcm_entry_count; + if (entry_count == 0) + RETURN(-EINVAL); + + mirror_count = lcm->lcm_mirror_count + 1; + want_composite = true; + break; + default: RETURN(-ENOTSUPP); + } - if ((v1->lmm_magic == LOV_MAGIC_COMP_V1 || - v1->lmm_magic == LOV_MAGIC_SEL) && - !(ah && ah->dah_append_stripes)) { - comp_v1 = (struct lov_comp_md_v1 *)v1; - comp_cnt = comp_v1->lcm_entry_count; - if (comp_cnt == 0) - RETURN(-EINVAL); - mirror_cnt = comp_v1->lcm_mirror_count + 1; - composite = true; - } else { - comp_cnt = 1; - mirror_cnt = 0; - composite = false; + if (append_stripe_count != 0 || append_pool != NULL) { + entry_count = 1; + mirror_count = 0; + want_composite = false; } /* realloc default comp entries if necessary */ - rc = lod_def_striping_comp_resize(lds, comp_cnt); + rc = lod_def_striping_comp_resize(lds, entry_count); if (rc < 0) RETURN(rc); - lds->lds_def_comp_cnt = comp_cnt; - lds->lds_def_striping_is_composite = composite; - lds->lds_def_mirror_cnt = mirror_cnt; + lds->lds_def_comp_cnt = entry_count; + lds->lds_def_striping_is_composite = want_composite; + lds->lds_def_mirror_cnt = mirror_count; - for (i = 0; i < comp_cnt; i++) { - struct lod_layout_component *lod_comp; - char *pool; + for (i = 0; i < entry_count; i++) { + struct lod_layout_component *llc = &lds->lds_def_comp_entries[i]; + const char *pool; - lod_comp = &lds->lds_def_comp_entries[i]; /* - * reset lod_comp values, llc_stripes is always NULL in - * the default striping template, llc_pool will be reset - * later below. + * reset llc values, llc_stripes is always NULL in the + * default striping template, llc_pool will be reset + * later below using lod_set_pool(). + * + * XXX At this point llc_pool may point to valid (!) + * kmalloced strings from previous RPCs. */ - memset(lod_comp, 0, offsetof(typeof(*lod_comp), llc_pool)); - - if (composite) { - v1 = (struct lov_user_md *)((char *)comp_v1 + - comp_v1->lcm_entries[i].lcme_offset); - lod_comp->llc_extent = - comp_v1->lcm_entries[i].lcme_extent; - /* We only inherit certain flags from the layout */ - lod_comp->llc_flags = - comp_v1->lcm_entries[i].lcme_flags & + memset(llc, 0, offsetof(typeof(*llc), llc_pool)); + + if (lcm != NULL) { + v1 = (struct lov_user_md *)((char *)lcm + + lcm->lcm_entries[i].lcme_offset); + + if (want_composite) { + llc->llc_extent = lcm->lcm_entries[i].lcme_extent; + /* We only inherit certain flags from the layout */ + llc->llc_flags = lcm->lcm_entries[i].lcme_flags & LCME_TEMPLATE_FLAGS; + } } + CDEBUG(D_LAYOUT, DFID" magic = %#08x, pattern = %#x, stripe_count = %hu, stripe_size = %u, stripe_offset = %hu, append_pool = '%s', append_stripe_count = %d\n", + PFID(lu_object_fid(&lo->ldo_obj.do_lu)), + v1->lmm_magic, + v1->lmm_pattern, + v1->lmm_stripe_count, + v1->lmm_stripe_size, + v1->lmm_stripe_offset, + append_pool ?: "", + append_stripe_count); + if (!lov_pattern_supported(v1->lmm_pattern) && !(v1->lmm_pattern & LOV_PATTERN_F_RELEASED)) { lod_free_def_comp_entries(lds); RETURN(-EINVAL); } - CDEBUG(D_LAYOUT, DFID" stripe_count=%d stripe_size=%d stripe_offset=%d append_stripes=%d\n", - PFID(lu_object_fid(&lo->ldo_obj.do_lu)), - (int)v1->lmm_stripe_count, (int)v1->lmm_stripe_size, - (int)v1->lmm_stripe_offset, - ah ? ah->dah_append_stripes : 0); + llc->llc_stripe_count = v1->lmm_stripe_count; + llc->llc_stripe_size = v1->lmm_stripe_size; + llc->llc_stripe_offset = v1->lmm_stripe_offset; + llc->llc_pattern = v1->lmm_pattern; - if (ah && ah->dah_append_stripes) - lod_comp->llc_stripe_count = ah->dah_append_stripes; - else - lod_comp->llc_stripe_count = v1->lmm_stripe_count; - lod_comp->llc_stripe_size = v1->lmm_stripe_size; - lod_comp->llc_stripe_offset = v1->lmm_stripe_offset; - lod_comp->llc_pattern = v1->lmm_pattern; + if (append_stripe_count != 0 || append_pool != NULL) + llc->llc_pattern = LOV_PATTERN_RAID0; + + if (append_stripe_count != 0) + llc->llc_stripe_count = append_stripe_count; pool = NULL; - if (ah && ah->dah_append_pool && ah->dah_append_pool[0]) { - pool = ah->dah_append_pool; + if (append_pool != NULL) { + pool = append_pool; } else if (v1->lmm_magic == LOV_USER_MAGIC_V3) { /* XXX: sanity check here */ - v3 = (struct lov_user_md_v3 *) v1; + v3 = (struct lov_user_md_v3 *)v1; if (v3->lmm_pool_name[0] != '\0') pool = v3->lmm_pool_name; } - lod_set_def_pool(lds, i, pool); - if (v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) { + + lod_set_pool(&llc->llc_pool, pool); + + if (append_stripe_count != 0 || append_pool != NULL) { + /* Ignore specific striping for append. */ + } else if (v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) { v3 = (struct lov_user_md_v3 *)v1; - rc = lod_comp_copy_ost_lists(lod_comp, v3); + rc = lod_comp_copy_ost_lists(llc, v3); if (rc) RETURN(rc); - } else if (lod_comp->llc_ostlist.op_array && - lod_comp->llc_ostlist.op_count) { - for (j = 0; j < lod_comp->llc_ostlist.op_count; j++) - lod_comp->llc_ostlist.op_array[j] = -1; - lod_comp->llc_ostlist.op_count = 0; + } else if (llc->llc_ostlist.op_array && + llc->llc_ostlist.op_count) { + for (j = 0; j < llc->llc_ostlist.op_count; j++) + llc->llc_ostlist.op_array[j] = -1; + llc->llc_ostlist.op_count = 0; } } @@ -5617,7 +5640,7 @@ static void lod_striping_from_default(struct lod_object *lo, } static inline bool lod_need_inherit_more(struct lod_object *lo, bool from_root, - char *append_pool) + const char *append_pool) { struct lod_layout_component *lod_comp; @@ -5674,8 +5697,8 @@ static void lod_ah_init(const struct lu_env *env, LASSERT(child); - if (ah->dah_append_stripes == -1) - ah->dah_append_stripes = + if (ah->dah_append_stripe_count == -1) + ah->dah_append_stripe_count = d->lod_ost_descs.ltd_lov_desc.ld_tgt_count; if (likely(parent)) { @@ -5919,7 +5942,8 @@ out: LASSERT(!lc->ldo_is_composite); lod_comp = &lc->ldo_comp_entries[0]; desc = &d->lod_ost_descs.ltd_lov_desc; - lod_adjust_stripe_info(lod_comp, desc, ah->dah_append_stripes); + lod_adjust_stripe_info(lod_comp, desc, + ah->dah_append_stripe_count); if (ah->dah_append_pool && ah->dah_append_pool[0]) lod_obj_set_pool(lc, 0, ah->dah_append_pool); } diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 4af567f..3e4d7ed 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -3201,22 +3201,10 @@ void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent, unlikely(spec != NULL && spec->sp_cr_flags & MDS_OPEN_HAS_EA)) { hint->dah_eadata = spec->u.sp_ea.eadata; hint->dah_eadata_len = spec->u.sp_ea.eadatalen; - } else { - hint->dah_eadata = NULL; - hint->dah_eadata_len = 0; - if (spec->sp_cr_flags & MDS_OPEN_APPEND) { - if (mdd->mdd_append_stripe_count != 0 || - mdd->mdd_append_pool[0]) - CDEBUG(D_INFO, - "using O_APPEND file striping\n"); - if (mdd->mdd_append_stripe_count) - hint->dah_append_stripes = - mdd->mdd_append_stripe_count; - if (mdd->mdd_append_pool[0]) - hint->dah_append_pool = mdd->mdd_append_pool; - } else { - hint->dah_append_stripes = 0; - } + } else if (S_ISREG(attr->la_mode) && + spec->sp_cr_flags & MDS_OPEN_APPEND) { + hint->dah_append_stripe_count = mdd->mdd_append_stripe_count; + hint->dah_append_pool = mdd->mdd_append_pool; } CDEBUG(D_INFO, DFID" eadata %p len %d\n", PFID(mdd_object_fid(child)), diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 5b034cc..5e1c8bd 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -3428,7 +3428,6 @@ static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah, LASSERT(ah); ah->dah_parent = parent; - ah->dah_mode = child_mode; if (parent != NULL && !dt_object_remote(parent)) { /* will help to find FID->ino at dt_insert("..") */ diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index 2a0e87d..b772570 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -1475,7 +1475,6 @@ static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah, LASSERT(ah); ah->dah_parent = parent; - ah->dah_mode = child_mode; if (parent != NULL && !dt_object_remote(parent)) { /* will help to find FID->ino at dt_insert("..") */ diff --git a/lustre/osp/osp_md_object.c b/lustre/osp/osp_md_object.c index 7f00ec1..6ff56f4 100644 --- a/lustre/osp/osp_md_object.c +++ b/lustre/osp/osp_md_object.c @@ -305,7 +305,6 @@ static void osp_md_ah_init(const struct lu_env *env, LASSERT(ah); ah->dah_parent = parent; - ah->dah_mode = child_mode; } /** diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 19f2212..4ddad5e 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -2962,11 +2962,10 @@ test_27M() { remote_mds_nodsh && skip "remote MDS with nodsh" [[ $OSTCOUNT -lt 2 ]] && skip_env "need > 1 OST" - test_mkdir $DIR/$tdir - # Set default striping on directory local setcount=4 local stripe_opt + local mdts=$(comma_list $(mdts_nodes)) # if we run against a 2.12 server which lacks overstring support # then the connect_flag will not report overstriping, even if client @@ -2978,6 +2977,18 @@ test_27M() { else skip "server does not support overstriping" fi + + test_mkdir $DIR/$tdir + + # Validate existing append_* params and ensure restore + local pool=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_pool) + [[ "$pool" == "" ]] || error "expected append_pool == '', got '$pool'" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_pool=none" + + local orig_count=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_stripe_count) + ((orig_count == 1)) || error "expected append_stripe_count == 1, got $orig_count" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=1" + $LFS setstripe $stripe_opt $DIR/$tdir echo 1 > $DIR/$tdir/${tfile}.1 @@ -2985,11 +2996,6 @@ test_27M() { [ $count -eq $setcount ] || error "(1) stripe count $count, should be $setcount" - # Capture existing append_stripe_count setting for restore - local orig_count=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_stripe_count) - local mdts=$(comma_list $(mdts_nodes)) - stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=$orig_count" EXIT - local appendcount=$orig_count echo 1 >> $DIR/$tdir/${tfile}.2_append count=$($LFS getstripe -c $DIR/$tdir/${tfile}.2_append) @@ -3067,12 +3073,9 @@ test_27M() { error "(9) stripe count $count, should be $appendcount for append" # Now test O_APPEND striping with pools - do_nodes $mdts $LCTL set_param mdd.*.append_pool="$TESTNAME" - stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_pool='none'" EXIT - - # Create the pool pool_add $TESTNAME || error "pool creation failed" pool_add_targets $TESTNAME 0 1 || error "Pool add targets failed" + do_nodes $mdts $LCTL set_param mdd.*.append_pool="$TESTNAME" echo 1 >> $DIR/$tdir/${tfile}.10_append @@ -3415,6 +3418,66 @@ test_27T() { } run_test 27T "no eio on close on partial write due to enosp" +test_27U() { + local dir=$DIR/$tdir + local file=$dir/$tfile + local append_pool=${TESTNAME}-append + local normal_pool=${TESTNAME}-normal + local pool + local stripe_count + local stripe_count2 + local mdts=$(comma_list $(mdts_nodes)) + + # FIMXE + # (( $MDS1_VERSION >= $(version_code 2.15.42) )) || + # skip "Need MDS version at least 2.15.42" + + # Validate existing append_* params and ensure restore + pool=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_pool) + [[ "$pool" == "" ]] || error "expected append_pool == '', got '$pool'" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_pool=none" + + stripe_count=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_stripe_count) + ((stripe_count == 1)) || error "expected append_stripe_count != 0, got $stripe_count" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=$stripe_count" + + pool_add $append_pool || error "pool creation failed" + pool_add_targets $append_pool 0 1 || error "Pool add targets failed" + + pool_add $normal_pool || error "pool creation failed" + pool_add_targets $normal_pool 0 1 || error "Pool add targets failed" + + test_mkdir $dir + $LFS setstripe -E 1M -c 1 -p $normal_pool -E 2M -c 2 -p $normal_pool -E eof -c -1 $dir + + echo XXX >> $file.1 + $LFS getstripe $file.1 + + pool=$($LFS getstripe -p $file.1) + [[ "$pool" == "$normal_pool" ]] || error "got pool '$pool', expected '$normal_pool'" + + stripe_count2=$($LFS getstripe -c $file.1) + ((stripe_count2 == stripe_count)) || + error "got stripe_count '$stripe_count2', expected '$stripe_count'" + + do_nodes $mdts $LCTL set_param mdd.*.append_pool=$append_pool + + echo XXX >> $file.2 + $LFS getstripe $file.2 + + pool=$($LFS getstripe -p $file.2) + [[ "$pool" == "$append_pool" ]] || error "got pool '$pool', expected '$append_pool'" + + do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=2 + + echo XXX >> $file.3 + $LFS getstripe $file.3 + + stripe_count2=$($LFS getstripe -c $file.3) + ((stripe_count2 == 2)) || error "got stripe_count '$stripe_count2', expected 2" +} +run_test 27U "append pool and stripe count work with composite default layout" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091 -- 1.8.3.1