Whamcloud - gitweb
LU-11146 lustre: fix setstripe for specific osts upon dir 14/32814/16
authorWang Shilong <wshilong@ddn.com>
Wed, 11 Jul 2018 14:11:47 +0000 (22:11 +0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 23 Aug 2018 07:18:13 +0000 (07:18 +0000)
LOV_USER_MAGIC_SPECIFIC function is broken and it
was not available for setting directory.

1)llite doesn't handle LOV_USER_MAGIC_SPECIFIC case
properly for dir {set,get}_stripe, and ioctl
LL_IOC_LOV_SETSTRIPE did not alloc enough buf,
copy ost lists from userspace.

2)lod_get_default_lov_striping() did not handle
LOV_USER_MAGIC_SPECIFIC type that newly created
files/dir won't inherit parent setting well.

3)there is not any case to cover lfs setstripe
'-o' interface which make it hard to figure out
when this function was broken.

Change-Id: Icc2ee60a474e5e565db12b35a9a38fde65b05bbd
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-on: https://review.whamcloud.com/32814
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/llite/dir.c
lustre/lod/lod_internal.h
lustre/lod/lod_object.c
lustre/lod/lod_qos.c
lustre/tests/conf-sanity.sh
lustre/tests/sanity.sh
lustre/utils/liblustreapi.c

index 8c9bc1d..cb73a4d 100644 (file)
@@ -584,13 +584,28 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
                        lum_size = sizeof(struct lmv_user_md);
                        break;
                }
-                default: {
-                        CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
-                                        " %#08x != %#08x nor %#08x\n",
-                                        lump->lmm_magic, LOV_USER_MAGIC_V1,
-                                        LOV_USER_MAGIC_V3);
-                        RETURN(-EINVAL);
-                }
+               case LOV_USER_MAGIC_SPECIFIC: {
+                       struct lov_user_md_v3 *v3 =
+                                       (struct lov_user_md_v3 *)lump;
+                       if (v3->lmm_stripe_count > LOV_MAX_STRIPE_COUNT)
+                               RETURN(-EINVAL);
+                       if (lump->lmm_magic !=
+                           cpu_to_le32(LOV_USER_MAGIC_SPECIFIC)) {
+                               lustre_swab_lov_user_md_v3(v3);
+                               lustre_swab_lov_user_md_objects(v3->lmm_objects,
+                                               v3->lmm_stripe_count);
+                       }
+                       lum_size = lov_user_md_size(v3->lmm_stripe_count,
+                                                   LOV_USER_MAGIC_SPECIFIC);
+                       break;
+               }
+               default: {
+                       CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
+                                       " %#08x != %#08x nor %#08x\n",
+                                       lump->lmm_magic, LOV_USER_MAGIC_V1,
+                                       LOV_USER_MAGIC_V3);
+                       RETURN(-EINVAL);
+               }
                 }
         } else {
                 lum_size = sizeof(struct lov_user_md_v1);
@@ -742,6 +757,16 @@ int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size,
                if (LMV_USER_MAGIC != cpu_to_le32(LMV_USER_MAGIC))
                        lustre_swab_lmv_user_md((struct lmv_user_md *)lmm);
                break;
+       case LOV_USER_MAGIC_SPECIFIC: {
+               struct lov_user_md_v3 *v3 = (struct lov_user_md_v3 *)lmm;
+
+               if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
+                       lustre_swab_lov_user_md_v3(v3);
+                       lustre_swab_lov_user_md_objects(v3->lmm_objects,
+                                                       v3->lmm_stripe_count);
+                       }
+               }
+               break;
        default:
                CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
                rc = -EPROTO;
@@ -1288,34 +1313,51 @@ lmv_out_free:
        }
        case LL_IOC_LOV_SETSTRIPE_NEW:
        case LL_IOC_LOV_SETSTRIPE: {
-               struct lov_user_md_v3 lumv3;
-               struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+               struct lov_user_md_v3 *lumv3 = NULL;
+               struct lov_user_md_v1 lumv1;
+               struct lov_user_md_v1 *lumv1_ptr = &lumv1;
                struct lov_user_md_v1 __user *lumv1p =
                        (struct lov_user_md_v1 __user *)arg;
                struct lov_user_md_v3 __user *lumv3p =
                        (struct lov_user_md_v3 __user *)arg;
+               int lum_size;
 
                int set_default = 0;
 
                CLASSERT(sizeof(struct lov_user_md_v3) >
                         sizeof(struct lov_comp_md_v1));
-               CLASSERT(sizeof(lumv3) == sizeof(*lumv3p));
-               CLASSERT(sizeof(lumv3.lmm_objects[0]) ==
-                        sizeof(lumv3p->lmm_objects[0]));
+               CLASSERT(sizeof(*lumv3) == sizeof(*lumv3p));
                /* first try with v1 which is smaller than v3 */
-               if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
+               if (copy_from_user(&lumv1, lumv1p, sizeof(lumv1)))
                        RETURN(-EFAULT);
 
-               if (lumv1->lmm_magic == LOV_USER_MAGIC_V3)
-                       if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
-                               RETURN(-EFAULT);
-
                if (inode->i_sb->s_root == file_dentry(file))
                        set_default = 1;
 
-               /* in v1 and v3 cases lumv1 points to data */
-               rc = ll_dir_setstripe(inode, lumv1, set_default);
+               switch (lumv1.lmm_magic) {
+               case LOV_USER_MAGIC_V3:
+               case LOV_USER_MAGIC_SPECIFIC:
+                       lum_size = ll_lov_user_md_size(&lumv1);
+                       if (lum_size < 0)
+                               RETURN(lum_size);
+                       OBD_ALLOC(lumv3, lum_size);
+                       if (!lumv3)
+                               RETURN(-ENOMEM);
+                       if (copy_from_user(lumv3, lumv3p, lum_size))
+                               GOTO(out, rc = -EFAULT);
+                       lumv1_ptr = (struct lov_user_md_v1 *)lumv3;
+                       break;
+               case LOV_USER_MAGIC_V1:
+                       break;
+               default:
+                       GOTO(out, rc = -ENOTSUPP);
+               }
 
+               /* in v1 and v3 cases lumv1 points to data */
+               rc = ll_dir_setstripe(inode, lumv1_ptr, set_default);
+out:
+               if (lumv3)
+                       OBD_FREE(lumv3, lum_size);
                RETURN(rc);
        }
        case LL_IOC_LMV_GETSTRIPE: {
index f74d951..b478332 100644 (file)
@@ -757,6 +757,8 @@ void lod_striping_free(const struct lu_env *env, struct lod_object *lo);
 int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo,
                            struct thandle *th,
                            struct lod_obj_stripe_cb_data *data);
+int lod_comp_copy_ost_lists(struct lod_layout_component *lod_comp,
+                           struct lov_user_md_v3 *v3);
 
 /* lod_sub_object.c */
 struct thandle *lod_sub_get_thandle(const struct lu_env *env,
index c4a405a..91b901b 100644 (file)
@@ -3175,6 +3175,7 @@ static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
        lum = buf->lb_buf;
 
        switch (lum->lmm_magic) {
+       case LOV_USER_MAGIC_SPECIFIC:
        case LOV_USER_MAGIC_V3:
                v3 = buf->lb_buf;
                if (v3->lmm_pool_name[0] != '\0')
@@ -4005,6 +4006,49 @@ static inline int lod_object_will_be_striped(int is_reg, const struct lu_fid *fi
        return (is_reg && fid_seq(fid) != FID_SEQ_LOCAL_FILE);
 }
 
+/**
+ * Copy OST list from layout provided by user.
+ *
+ * \param[in] lod_comp         layout_component to be filled
+ * \param[in] v3               LOV EA V3 user data
+ *
+ * \retval             0 on success
+ * \retval             negative if failed
+ */
+int lod_comp_copy_ost_lists(struct lod_layout_component *lod_comp,
+                           struct lov_user_md_v3 *v3)
+{
+       int j;
+
+       ENTRY;
+
+       if (v3->lmm_stripe_offset == LOV_OFFSET_DEFAULT)
+               v3->lmm_stripe_offset = v3->lmm_objects[0].l_ost_idx;
+
+       if (lod_comp->llc_ostlist.op_array) {
+               if (lod_comp->llc_ostlist.op_count ==
+                                       v3->lmm_stripe_count)
+                       goto skip;
+               OBD_FREE(lod_comp->llc_ostlist.op_array,
+                        lod_comp->llc_ostlist.op_size);
+       }
+
+       /* copy ost list from lmm */
+       lod_comp->llc_ostlist.op_count = v3->lmm_stripe_count;
+       lod_comp->llc_ostlist.op_size = v3->lmm_stripe_count * sizeof(__u32);
+       OBD_ALLOC(lod_comp->llc_ostlist.op_array,
+                 lod_comp->llc_ostlist.op_size);
+       if (!lod_comp->llc_ostlist.op_array)
+               RETURN(-ENOMEM);
+skip:
+       for (j = 0; j < v3->lmm_stripe_count; j++) {
+               lod_comp->llc_ostlist.op_array[j] =
+                       v3->lmm_objects[j].l_ost_idx;
+       }
+
+       RETURN(0);
+}
+
 
 /**
  * Get default striping.
@@ -4045,13 +4089,19 @@ static int lod_get_default_lov_striping(const struct lu_env *env,
        } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
                v3 = (struct lov_user_md_v3 *)v1;
                lustre_swab_lov_user_md_v3(v3);
+       } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_SPECIFIC)) {
+               v3 = (struct lov_user_md_v3 *)v1;
+               lustre_swab_lov_user_md_v3(v3);
+               lustre_swab_lov_user_md_objects(v3->lmm_objects,
+                                               v3->lmm_stripe_count);
        } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_COMP_V1)) {
                comp_v1 = (struct lov_comp_md_v1 *)v1;
                lustre_swab_lov_comp_md_v1(comp_v1);
        }
 
        if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1 &&
-           v1->lmm_magic != LOV_MAGIC_COMP_V1)
+           v1->lmm_magic != LOV_MAGIC_COMP_V1 &&
+           v1->lmm_magic != LOV_USER_MAGIC_SPECIFIC)
                RETURN(-ENOTSUPP);
 
        if (v1->lmm_magic == LOV_MAGIC_COMP_V1) {
@@ -4122,6 +4172,12 @@ static int lod_get_default_lov_striping(const struct lu_env *env,
                                pool = v3->lmm_pool_name;
                }
                lod_set_def_pool(lds, i, pool);
+               if (v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
+                       v3 = (struct lov_user_md_v3 *)v1;
+                       rc = lod_comp_copy_ost_lists(lod_comp, v3);
+                       if (rc)
+                               RETURN(rc);
+               }
        }
 
        lds->lds_def_striping_set = 1;
@@ -4240,6 +4296,17 @@ static void lod_striping_from_default(struct lod_object *lo,
                                lod_obj_set_pool(lo, i, def_comp->llc_pool);
                        }
 
+                       /* copy ost list */
+                       if (def_comp->llc_ostlist.op_array) {
+                               OBD_ALLOC(obj_comp->llc_ostlist.op_array,
+                                         obj_comp->llc_ostlist.op_size);
+                               if (!obj_comp->llc_ostlist.op_array)
+                                       return;
+                               memcpy(obj_comp->llc_ostlist.op_array,
+                                      def_comp->llc_ostlist.op_array,
+                                      obj_comp->llc_ostlist.op_size);
+                       }
+
                        /*
                         * Don't initialize these fields for plain layout
                         * (v1/v3) here, they are inherited in the order of
index c1c7e31..09a0630 100644 (file)
@@ -1219,6 +1219,10 @@ static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
        if (rc < 0)
                RETURN(rc);
 
+       if (lod_comp->llc_stripe_offset == LOV_OFFSET_DEFAULT)
+               lod_comp->llc_stripe_offset =
+                               lod_comp->llc_ostlist.op_array[0];
+
        for (i = 0; i < lod_comp->llc_stripe_count; i++) {
                if (lod_comp->llc_ostlist.op_array[i] ==
                    lod_comp->llc_stripe_offset) {
@@ -2041,30 +2045,14 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo,
                pool_name = NULL;
                if (v1->lmm_magic == LOV_USER_MAGIC_V3 ||
                    v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
-                       int j;
-
                        v3 = (struct lov_user_md_v3 *)v1;
                        if (v3->lmm_pool_name[0] != '\0')
                                pool_name = v3->lmm_pool_name;
 
                        if (v3->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
-                               if (v3->lmm_stripe_offset == LOV_OFFSET_DEFAULT)
-                                       v3->lmm_stripe_offset =
-                                               v3->lmm_objects[0].l_ost_idx;
-
-                               /* copy ost list from lmm */
-                               lod_comp->llc_ostlist.op_count =
-                                       v3->lmm_stripe_count;
-                               lod_comp->llc_ostlist.op_size =
-                                       v3->lmm_stripe_count * sizeof(__u32);
-                               OBD_ALLOC(lod_comp->llc_ostlist.op_array,
-                                         lod_comp->llc_ostlist.op_size);
-                               if (!lod_comp->llc_ostlist.op_array)
-                                       GOTO(free_comp, rc = -ENOMEM);
-
-                               for (j = 0; j < v3->lmm_stripe_count; j++)
-                                       lod_comp->llc_ostlist.op_array[j] =
-                                               v3->lmm_objects[j].l_ost_idx;
+                               rc = lod_comp_copy_ost_lists(lod_comp, v3);
+                               if (rc)
+                                       GOTO(free_comp, rc);
                        }
                }
 
index 6c05b97..d4ada59 100644 (file)
@@ -5895,17 +5895,21 @@ test_82a() { # LU-4665
        echo -e "\n$cmd"
        eval $cmd && error "index $start_ost_idx should be in $ost_indices"
 
-       # 5. Specifying OST indices for directory should fail with ENOSUPP.
+       # 5. Specifying OST indices for directory should succeed.
        local dir=$DIR/$tdir/$tdir
        mkdir $dir || error "mkdir $dir failed"
        cmd="$SETSTRIPE -o $ost_indices $dir"
-       echo -e "\n$cmd"
-       eval $cmd && error "$cmd should fail, specifying OST indices" \
-                          "for directory is not supported"
+       if [[ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.11.53) &&
+          $(lustre_version_code client -gt $(version_code 2.11.53)) ]]; then
+               echo -e "\n$cmd"
+               eval $cmd || error "unable to specify OST indices on directory"
+       else
+               echo "need MDS+client version at least 2.11.53"
+       fi
 
        restore_ostindex
 }
-run_test 82a "specify OSTs for file (succeed) or directory (fail)"
+run_test 82a "specify OSTs for file (succeed) or directory (succeed)"
 
 cleanup_82b() {
        trap 0
index 9a98a85..e63d910 100755 (executable)
@@ -2297,6 +2297,25 @@ test_27G() { #LU-10629
 }
 run_test 27G "Clear OST pool from stripe"
 
+test_27H() {
+       [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.11.53) ]] &&
+               skip "Need MDS version newer than 2.11.53"
+       [[ $OSTCOUNT -lt 3 ]] && skip_env "needs >= 3 OSTs"
+       test_mkdir $DIR/$tdir
+       $LFS setstripe -o 0 -o 2 $DIR/$tdir || error "setstripe failed"
+       touch $DIR/$tdir/$tfile
+       $LFS getstripe -c $DIR/$tdir/$tfile
+       [ $($LFS getstripe -c $DIR/$tdir/$tfile) -eq 2 ] ||
+               error "two-stripe file doesn't have two stripes"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=4k count=4 || error "dd failed"
+       $LFS getstripe -y $DIR/$tdir/$tfile
+       (( $($LFS getstripe -y $DIR/$tdir/$tfile |
+            egrep -c "l_ost_idx: [02]$") == "2" )) ||
+               error "expected l_ost_idx: [02]$ not matched"
+}
+run_test 27H "Set specific OSTs stripe"
+
 # createtest also checks that device nodes are created and
 # then visible correctly (#2091)
 test_28() { # bug 2091
index 85057b8..86750a8 100644 (file)
@@ -3480,6 +3480,7 @@ static void llapi_lov_dump_user_lmm(struct find_param *param, char *path,
        switch (magic) {
        case LOV_USER_MAGIC_V1:
        case LOV_USER_MAGIC_V3:
+       case LOV_USER_MAGIC_SPECIFIC:
                lov_dump_plain_user_lmm(param, path, flags);
                break;
        case LMV_MAGIC_V1: