From: Wang Shilong Date: Wed, 11 Jul 2018 14:11:47 +0000 (+0800) Subject: LU-11146 lustre: fix setstripe for specific osts upon dir X-Git-Tag: 2.11.55~60 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=083d62ee6de5ac6cee95c1d2f86b62b75034093b LU-11146 lustre: fix setstripe for specific osts upon dir LOV_USER_MAGIC_SPECIFIC function is broken and it was not available for setting directory. 1)llite doesn't handle LOV_USER_MAGIC_SPECIFIC case properly for dir {set,get}_stripe, and ioctl LL_IOC_LOV_SETSTRIPE did not alloc enough buf, copy ost lists from userspace. 2)lod_get_default_lov_striping() did not handle LOV_USER_MAGIC_SPECIFIC type that newly created files/dir won't inherit parent setting well. 3)there is not any case to cover lfs setstripe '-o' interface which make it hard to figure out when this function was broken. Change-Id: Icc2ee60a474e5e565db12b35a9a38fde65b05bbd Signed-off-by: Wang Shilong Reviewed-on: https://review.whamcloud.com/32814 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Bobi Jam Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 8c9bc1d..cb73a4d 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -584,13 +584,28 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, lum_size = sizeof(struct lmv_user_md); break; } - default: { - CDEBUG(D_IOCTL, "bad userland LOV MAGIC:" - " %#08x != %#08x nor %#08x\n", - lump->lmm_magic, LOV_USER_MAGIC_V1, - LOV_USER_MAGIC_V3); - RETURN(-EINVAL); - } + case LOV_USER_MAGIC_SPECIFIC: { + struct lov_user_md_v3 *v3 = + (struct lov_user_md_v3 *)lump; + if (v3->lmm_stripe_count > LOV_MAX_STRIPE_COUNT) + RETURN(-EINVAL); + if (lump->lmm_magic != + cpu_to_le32(LOV_USER_MAGIC_SPECIFIC)) { + lustre_swab_lov_user_md_v3(v3); + lustre_swab_lov_user_md_objects(v3->lmm_objects, + v3->lmm_stripe_count); + } + lum_size = lov_user_md_size(v3->lmm_stripe_count, + LOV_USER_MAGIC_SPECIFIC); + break; + } + default: { + CDEBUG(D_IOCTL, "bad userland LOV MAGIC:" + " %#08x != %#08x nor %#08x\n", + lump->lmm_magic, LOV_USER_MAGIC_V1, + LOV_USER_MAGIC_V3); + RETURN(-EINVAL); + } } } else { lum_size = sizeof(struct lov_user_md_v1); @@ -742,6 +757,16 @@ int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, if (LMV_USER_MAGIC != cpu_to_le32(LMV_USER_MAGIC)) lustre_swab_lmv_user_md((struct lmv_user_md *)lmm); break; + case LOV_USER_MAGIC_SPECIFIC: { + struct lov_user_md_v3 *v3 = (struct lov_user_md_v3 *)lmm; + + if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) { + lustre_swab_lov_user_md_v3(v3); + lustre_swab_lov_user_md_objects(v3->lmm_objects, + v3->lmm_stripe_count); + } + } + break; default: CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic); rc = -EPROTO; @@ -1288,34 +1313,51 @@ lmv_out_free: } case LL_IOC_LOV_SETSTRIPE_NEW: case LL_IOC_LOV_SETSTRIPE: { - struct lov_user_md_v3 lumv3; - struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3; + struct lov_user_md_v3 *lumv3 = NULL; + struct lov_user_md_v1 lumv1; + struct lov_user_md_v1 *lumv1_ptr = &lumv1; struct lov_user_md_v1 __user *lumv1p = (struct lov_user_md_v1 __user *)arg; struct lov_user_md_v3 __user *lumv3p = (struct lov_user_md_v3 __user *)arg; + int lum_size; int set_default = 0; CLASSERT(sizeof(struct lov_user_md_v3) > sizeof(struct lov_comp_md_v1)); - CLASSERT(sizeof(lumv3) == sizeof(*lumv3p)); - CLASSERT(sizeof(lumv3.lmm_objects[0]) == - sizeof(lumv3p->lmm_objects[0])); + CLASSERT(sizeof(*lumv3) == sizeof(*lumv3p)); /* first try with v1 which is smaller than v3 */ - if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1))) + if (copy_from_user(&lumv1, lumv1p, sizeof(lumv1))) RETURN(-EFAULT); - if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) - if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3))) - RETURN(-EFAULT); - if (inode->i_sb->s_root == file_dentry(file)) set_default = 1; - /* in v1 and v3 cases lumv1 points to data */ - rc = ll_dir_setstripe(inode, lumv1, set_default); + switch (lumv1.lmm_magic) { + case LOV_USER_MAGIC_V3: + case LOV_USER_MAGIC_SPECIFIC: + lum_size = ll_lov_user_md_size(&lumv1); + if (lum_size < 0) + RETURN(lum_size); + OBD_ALLOC(lumv3, lum_size); + if (!lumv3) + RETURN(-ENOMEM); + if (copy_from_user(lumv3, lumv3p, lum_size)) + GOTO(out, rc = -EFAULT); + lumv1_ptr = (struct lov_user_md_v1 *)lumv3; + break; + case LOV_USER_MAGIC_V1: + break; + default: + GOTO(out, rc = -ENOTSUPP); + } + /* in v1 and v3 cases lumv1 points to data */ + rc = ll_dir_setstripe(inode, lumv1_ptr, set_default); +out: + if (lumv3) + OBD_FREE(lumv3, lum_size); RETURN(rc); } case LL_IOC_LMV_GETSTRIPE: { diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index f74d951..b478332 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -757,6 +757,8 @@ void lod_striping_free(const struct lu_env *env, struct lod_object *lo); int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo, struct thandle *th, struct lod_obj_stripe_cb_data *data); +int lod_comp_copy_ost_lists(struct lod_layout_component *lod_comp, + struct lov_user_md_v3 *v3); /* lod_sub_object.c */ struct thandle *lod_sub_get_thandle(const struct lu_env *env, diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index c4a405a..91b901b 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -3175,6 +3175,7 @@ static int lod_xattr_set_lov_on_dir(const struct lu_env *env, lum = buf->lb_buf; switch (lum->lmm_magic) { + case LOV_USER_MAGIC_SPECIFIC: case LOV_USER_MAGIC_V3: v3 = buf->lb_buf; if (v3->lmm_pool_name[0] != '\0') @@ -4005,6 +4006,49 @@ static inline int lod_object_will_be_striped(int is_reg, const struct lu_fid *fi return (is_reg && fid_seq(fid) != FID_SEQ_LOCAL_FILE); } +/** + * Copy OST list from layout provided by user. + * + * \param[in] lod_comp layout_component to be filled + * \param[in] v3 LOV EA V3 user data + * + * \retval 0 on success + * \retval negative if failed + */ +int lod_comp_copy_ost_lists(struct lod_layout_component *lod_comp, + struct lov_user_md_v3 *v3) +{ + int j; + + ENTRY; + + if (v3->lmm_stripe_offset == LOV_OFFSET_DEFAULT) + v3->lmm_stripe_offset = v3->lmm_objects[0].l_ost_idx; + + if (lod_comp->llc_ostlist.op_array) { + if (lod_comp->llc_ostlist.op_count == + v3->lmm_stripe_count) + goto skip; + OBD_FREE(lod_comp->llc_ostlist.op_array, + lod_comp->llc_ostlist.op_size); + } + + /* copy ost list from lmm */ + lod_comp->llc_ostlist.op_count = v3->lmm_stripe_count; + lod_comp->llc_ostlist.op_size = v3->lmm_stripe_count * sizeof(__u32); + OBD_ALLOC(lod_comp->llc_ostlist.op_array, + lod_comp->llc_ostlist.op_size); + if (!lod_comp->llc_ostlist.op_array) + RETURN(-ENOMEM); +skip: + for (j = 0; j < v3->lmm_stripe_count; j++) { + lod_comp->llc_ostlist.op_array[j] = + v3->lmm_objects[j].l_ost_idx; + } + + RETURN(0); +} + /** * Get default striping. @@ -4045,13 +4089,19 @@ static int lod_get_default_lov_striping(const struct lu_env *env, } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3)) { v3 = (struct lov_user_md_v3 *)v1; lustre_swab_lov_user_md_v3(v3); + } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_SPECIFIC)) { + v3 = (struct lov_user_md_v3 *)v1; + lustre_swab_lov_user_md_v3(v3); + lustre_swab_lov_user_md_objects(v3->lmm_objects, + v3->lmm_stripe_count); } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_COMP_V1)) { comp_v1 = (struct lov_comp_md_v1 *)v1; lustre_swab_lov_comp_md_v1(comp_v1); } if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1 && - v1->lmm_magic != LOV_MAGIC_COMP_V1) + v1->lmm_magic != LOV_MAGIC_COMP_V1 && + v1->lmm_magic != LOV_USER_MAGIC_SPECIFIC) RETURN(-ENOTSUPP); if (v1->lmm_magic == LOV_MAGIC_COMP_V1) { @@ -4122,6 +4172,12 @@ static int lod_get_default_lov_striping(const struct lu_env *env, pool = v3->lmm_pool_name; } lod_set_def_pool(lds, i, pool); + if (v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) { + v3 = (struct lov_user_md_v3 *)v1; + rc = lod_comp_copy_ost_lists(lod_comp, v3); + if (rc) + RETURN(rc); + } } lds->lds_def_striping_set = 1; @@ -4240,6 +4296,17 @@ static void lod_striping_from_default(struct lod_object *lo, lod_obj_set_pool(lo, i, def_comp->llc_pool); } + /* copy ost list */ + if (def_comp->llc_ostlist.op_array) { + OBD_ALLOC(obj_comp->llc_ostlist.op_array, + obj_comp->llc_ostlist.op_size); + if (!obj_comp->llc_ostlist.op_array) + return; + memcpy(obj_comp->llc_ostlist.op_array, + def_comp->llc_ostlist.op_array, + obj_comp->llc_ostlist.op_size); + } + /* * Don't initialize these fields for plain layout * (v1/v3) here, they are inherited in the order of diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index c1c7e31..09a0630 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -1219,6 +1219,10 @@ static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo, if (rc < 0) RETURN(rc); + if (lod_comp->llc_stripe_offset == LOV_OFFSET_DEFAULT) + lod_comp->llc_stripe_offset = + lod_comp->llc_ostlist.op_array[0]; + for (i = 0; i < lod_comp->llc_stripe_count; i++) { if (lod_comp->llc_ostlist.op_array[i] == lod_comp->llc_stripe_offset) { @@ -2041,30 +2045,14 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo, pool_name = NULL; if (v1->lmm_magic == LOV_USER_MAGIC_V3 || v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) { - int j; - v3 = (struct lov_user_md_v3 *)v1; if (v3->lmm_pool_name[0] != '\0') pool_name = v3->lmm_pool_name; if (v3->lmm_magic == LOV_USER_MAGIC_SPECIFIC) { - if (v3->lmm_stripe_offset == LOV_OFFSET_DEFAULT) - v3->lmm_stripe_offset = - v3->lmm_objects[0].l_ost_idx; - - /* copy ost list from lmm */ - lod_comp->llc_ostlist.op_count = - v3->lmm_stripe_count; - lod_comp->llc_ostlist.op_size = - v3->lmm_stripe_count * sizeof(__u32); - OBD_ALLOC(lod_comp->llc_ostlist.op_array, - lod_comp->llc_ostlist.op_size); - if (!lod_comp->llc_ostlist.op_array) - GOTO(free_comp, rc = -ENOMEM); - - for (j = 0; j < v3->lmm_stripe_count; j++) - lod_comp->llc_ostlist.op_array[j] = - v3->lmm_objects[j].l_ost_idx; + rc = lod_comp_copy_ost_lists(lod_comp, v3); + if (rc) + GOTO(free_comp, rc); } } diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 6c05b97..d4ada59 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -5895,17 +5895,21 @@ test_82a() { # LU-4665 echo -e "\n$cmd" eval $cmd && error "index $start_ost_idx should be in $ost_indices" - # 5. Specifying OST indices for directory should fail with ENOSUPP. + # 5. Specifying OST indices for directory should succeed. local dir=$DIR/$tdir/$tdir mkdir $dir || error "mkdir $dir failed" cmd="$SETSTRIPE -o $ost_indices $dir" - echo -e "\n$cmd" - eval $cmd && error "$cmd should fail, specifying OST indices" \ - "for directory is not supported" + if [[ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.11.53) && + $(lustre_version_code client -gt $(version_code 2.11.53)) ]]; then + echo -e "\n$cmd" + eval $cmd || error "unable to specify OST indices on directory" + else + echo "need MDS+client version at least 2.11.53" + fi restore_ostindex } -run_test 82a "specify OSTs for file (succeed) or directory (fail)" +run_test 82a "specify OSTs for file (succeed) or directory (succeed)" cleanup_82b() { trap 0 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 9a98a85..e63d910 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -2297,6 +2297,25 @@ test_27G() { #LU-10629 } run_test 27G "Clear OST pool from stripe" +test_27H() { + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.11.53) ]] && + skip "Need MDS version newer than 2.11.53" + [[ $OSTCOUNT -lt 3 ]] && skip_env "needs >= 3 OSTs" + test_mkdir $DIR/$tdir + $LFS setstripe -o 0 -o 2 $DIR/$tdir || error "setstripe failed" + touch $DIR/$tdir/$tfile + $LFS getstripe -c $DIR/$tdir/$tfile + [ $($LFS getstripe -c $DIR/$tdir/$tfile) -eq 2 ] || + error "two-stripe file doesn't have two stripes" + + dd if=/dev/zero of=$DIR/$tdir/$tfile bs=4k count=4 || error "dd failed" + $LFS getstripe -y $DIR/$tdir/$tfile + (( $($LFS getstripe -y $DIR/$tdir/$tfile | + egrep -c "l_ost_idx: [02]$") == "2" )) || + error "expected l_ost_idx: [02]$ not matched" +} +run_test 27H "Set specific OSTs stripe" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091 diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 85057b8..86750a8 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -3480,6 +3480,7 @@ static void llapi_lov_dump_user_lmm(struct find_param *param, char *path, switch (magic) { case LOV_USER_MAGIC_V1: case LOV_USER_MAGIC_V3: + case LOV_USER_MAGIC_SPECIFIC: lov_dump_plain_user_lmm(param, path, flags); break; case LMV_MAGIC_V1: