From: Jinshan Xiong Date: Fri, 15 Sep 2017 20:01:58 +0000 (+0000) Subject: LU-9771 flr: lfs setstripe to create a new mirror X-Git-Tag: 2.10.56~9^2^2~28 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=b879bbc27db53ecc899f6007bf4fe0d993615def LU-9771 flr: lfs setstripe to create a new mirror To create a mirrored file: 1. Create a component file lfs setstripe -E -1 /mnt/lustre/tf 2. Add a synced mirror - create a mirror with specified layout lfs setstripe --component-add [--mirror[=victim_file]] [setstripe options] if victim_file exists, it will split the layout from that file and use it as a mirror; otherwise, it will create a new mirror with the stripe options in 'setstripe options'. 3. [todo] support to create a mirrored file directly by lfs setstripe --mirror [setstripe options] --mirror [options] flag 'LCM_USER_FL_MIRROR' is reserved for this purpose. Test-Parameters: testlist=sanity-flr Signed-off-by: Jinshan Xiong Change-Id: I470feeb1a77554bd2c990e94d8538fd3d03d7b3b Reviewed-on: https://review.whamcloud.com/29083 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Fan Yong --- diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index af57228..50bad11 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -907,7 +907,8 @@ struct lu_rdpg { enum lu_xattr_flags { LU_XATTR_REPLACE = (1 << 0), - LU_XATTR_CREATE = (1 << 1) + LU_XATTR_CREATE = (1 << 1), + LU_XATTR_MERGE = (1 << 2), }; /** @} helpers */ diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index 79f2f17..1ae9361 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -148,13 +148,14 @@ int llapi_file_lookup(int dirfd, const char *name); #define VERBOSE_COMP_ID 0x2000 #define VERBOSE_DFID 0x4000 #define VERBOSE_HASH_TYPE 0x8000 +#define VERBOSE_MIRROR_COUNT 0x10000 #define VERBOSE_DEFAULT (VERBOSE_COUNT | VERBOSE_SIZE | \ VERBOSE_OFFSET | VERBOSE_POOL | \ VERBOSE_OBJID | VERBOSE_GENERATION | \ VERBOSE_LAYOUT | VERBOSE_HASH_TYPE | \ VERBOSE_COMP_COUNT | VERBOSE_COMP_FLAGS | \ VERBOSE_COMP_START | VERBOSE_COMP_END | \ - VERBOSE_COMP_ID) + VERBOSE_COMP_ID | VERBOSE_MIRROR_COUNT) struct find_param { unsigned int fp_max_depth; @@ -722,6 +723,11 @@ int llapi_layout_file_create(const char *path, int open_flags, int mode, const struct llapi_layout *layout); /** + * Set flags to the header of component layout. + */ +int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags); + +/** * Fetch the start and end offset of the current layout component. */ int llapi_layout_comp_extent_get(const struct llapi_layout *layout, diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 9635941..eaf53bd 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -1875,8 +1875,12 @@ enum mds_op_bias { MDS_HSM_RELEASE = 1 << 12, MDS_RENAME_MIGRATE = 1 << 13, MDS_CLOSE_LAYOUT_SWAP = 1 << 14, + MDS_CLOSE_LAYOUT_MERGE = 1 << 15, }; +#define MDS_CLOSE_INTENT (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP | \ + MDS_CLOSE_LAYOUT_MERGE) + /* instance of mdt_reint_rec */ struct mdt_rec_create { __u32 cr_opcode; diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index f4adad5..cf3c908 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -522,6 +522,11 @@ static inline bool lu_extent_is_overlapped(struct lu_extent *e1, return e1->e_start < e2->e_end && e2->e_start < e1->e_end; } +static inline bool lu_extent_is_whole(struct lu_extent *e) +{ + return e->e_start == 0 && e->e_end == LUSTRE_EOF; +} + enum lov_comp_md_entry_flags { LCME_FL_PRIMARY = 0x00000001, /* Not used */ LCME_FL_STALE = 0x00000002, /* Not used */ @@ -557,7 +562,33 @@ struct lov_comp_md_entry_v1 { __u64 lcme_padding[2]; } __attribute__((packed)); -enum lov_comp_md_flags; +#define SEQ_ID_MAX 0x0000FFFF +#define SEQ_ID_MASK SEQ_ID_MAX +/* bit 30:16 of lcme_id is used to store mirror id */ +#define MIRROR_ID_MASK 0x7FFF0000 +#define MIRROR_ID_SHIFT 16 + +static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid) +{ + return ((mirror_id << MIRROR_ID_SHIFT) & MIRROR_ID_MASK) | seqid; +} + +static inline __u16 mirror_id_of(__u32 id) +{ + return (id & MIRROR_ID_MASK) >> MIRROR_ID_SHIFT; +} + +/** + * on-disk data for lcm_flags. Valid if lcm_magic is LOV_MAGIC_COMP_V1. + */ +enum lov_comp_md_flags { + /* the least 2 bits are used by FLR to record file state */ + LCM_FL_NOT_FLR = 0, + LCM_FL_RDONLY = 1, + LCM_FL_WRITE_PENDING = 2, + LCM_FL_SYNC_PENDING = 3, + LCM_FL_FLR_MASK = 0x3, +}; struct lov_comp_md_v1 { __u32 lcm_magic; /* LOV_USER_MAGIC_COMP_V1 */ @@ -565,11 +596,19 @@ struct lov_comp_md_v1 { __u32 lcm_layout_gen; __u16 lcm_flags; __u16 lcm_entry_count; - __u64 lcm_padding1; + /* lcm_mirror_count stores the number of actual mirrors minus 1, + * so that non-flr files will have value 0 meaning 1 mirror. */ + __u16 lcm_mirror_count; + __u16 lcm_padding1[3]; __u64 lcm_padding2; struct lov_comp_md_entry_v1 lcm_entries[0]; } __attribute__((packed)); +/* + * Maximum number of mirrors Lustre can support. + */ +#define LUSTRE_MIRROR_COUNT_MAX 16 + static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic) { if (stripes == (__u16)-1) @@ -857,6 +896,8 @@ struct if_quotactl { #define SWAP_LAYOUTS_KEEP_MTIME (1 << 2) #define SWAP_LAYOUTS_KEEP_ATIME (1 << 3) #define SWAP_LAYOUTS_CLOSE (1 << 4) +#define MERGE_LAYOUTS_CLOSE (1 << 5) +#define INTENT_LAYOUTS_CLOSE (SWAP_LAYOUTS_CLOSE | MERGE_LAYOUTS_CLOSE) /* Swap XATTR_NAME_HSM as well, only on the MDT so far */ #define SWAP_LAYOUTS_MDS_HSM (1 << 31) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index f2a7f1d..701b647 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -144,9 +144,10 @@ static int ll_close_inode_openhandle(struct inode *inode, ll_prepare_close(inode, op_data, och); switch (bias) { + case MDS_CLOSE_LAYOUT_MERGE: case MDS_CLOSE_LAYOUT_SWAP: LASSERT(data != NULL); - op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP; + op_data->op_bias |= bias; op_data->op_data_version = 0; op_data->op_lease_handle = och->och_lease_handle; op_data->op_fid2 = *ll_inode2fid(data); @@ -170,8 +171,7 @@ static int ll_close_inode_openhandle(struct inode *inode, CERROR("%s: inode "DFID" mdc close failed: rc = %d\n", md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc); - if (rc == 0 && - op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) { + if (rc == 0 && op_data->op_bias & bias) { struct mdt_body *body; body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); @@ -914,10 +914,12 @@ static int ll_check_swap_layouts_validity(struct inode *inode1, } static int ll_swap_layouts_close(struct obd_client_handle *och, - struct inode *inode, struct inode *inode2) + struct inode *inode, struct inode *inode2, + int intent) { const struct lu_fid *fid1 = ll_inode2fid(inode); const struct lu_fid *fid2; + enum mds_op_bias bias; int rc; ENTRY; @@ -935,11 +937,21 @@ static int ll_swap_layouts_close(struct obd_client_handle *och, if (rc == 0) GOTO(out_free_och, rc = -EINVAL); - /* Close the file and swap layouts between inode & inode2. + switch (intent) { + case SWAP_LAYOUTS_CLOSE: + bias = MDS_CLOSE_LAYOUT_SWAP; + break; + case MERGE_LAYOUTS_CLOSE: + bias = MDS_CLOSE_LAYOUT_MERGE; + break; + default: + GOTO(out_free_och, rc = -EOPNOTSUPP); + } + + /* Close the file and {swap,merge} layouts between inode & inode2. * NB: lease lock handle is released in mdc_close_layout_swap_pack() * because we still need it to pack l_remote_handle to MDT. */ - rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP, - inode2); + rc = ll_close_inode_openhandle(inode, och, bias, inode2); och = NULL; /* freed in ll_close_inode_openhandle() */ @@ -2783,6 +2795,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case LL_IOC_LOV_SWAP_LAYOUTS: { struct file *file2; struct lustre_swap_layouts lsl; + __u64 intent; if (copy_from_user(&lsl, (char __user *)arg, sizeof(struct lustre_swap_layouts))) @@ -2799,14 +2812,12 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if ((file2->f_flags & O_ACCMODE) == O_RDONLY) GOTO(out, rc = -EPERM); - if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) { + intent = lsl.sl_flags & INTENT_LAYOUTS_CLOSE; + if (intent) { struct inode *inode2; struct ll_inode_info *lli; struct obd_client_handle *och = NULL; - if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE) - GOTO(out, rc = -EINVAL); - lli = ll_i2info(inode); mutex_lock(&lli->lli_och_mutex); if (fd->fd_lease_och != NULL) { @@ -2817,7 +2828,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (och == NULL) GOTO(out, rc = -ENOLCK); inode2 = file_inode(file2); - rc = ll_swap_layouts_close(och, inode, inode2); + rc = ll_swap_layouts_close(och, inode, inode2, intent); } else { rc = ll_swap_layouts(file, file2, &lsl); } diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index b0dfdbf..fe7d21b 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -274,7 +274,9 @@ struct lod_object { /* Layout component count for a regular file. * It equals to 1 for non-composite layout. */ __u16 ldo_comp_cnt; + __u16 ldo_mirror_count; __u32 ldo_is_composite:1, + ldo_flr_state:2, ldo_comp_cached:1; }; /* directory stripe (LMV) */ @@ -598,8 +600,8 @@ int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo, const struct lu_buf *buf); int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo, struct lov_ost_data_v1 *objs, int index); -int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, - bool is_from_disk, __u64 start); +int lod_verify_striping(struct lod_device *d, struct lod_object *lo, + const struct lu_buf *buf, bool is_from_disk); int lod_generate_lovea(const struct lu_env *env, struct lod_object *lo, struct lov_mds_md *lmm, int *lmm_size, bool is_dir); int lod_ea_store_resize(struct lod_thread_info *info, size_t size); diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index f5df58d..1a3c9df 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -943,6 +943,8 @@ int lod_generate_lovea(const struct lu_env *env, struct lod_object *lo, lcm = (struct lov_comp_md_v1 *)lmm; lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1); lcm->lcm_entry_count = cpu_to_le16(comp_cnt); + lcm->lcm_mirror_count = cpu_to_le16(lo->ldo_mirror_count); + lcm->lcm_flags = cpu_to_le16(lo->ldo_flr_state); offset = sizeof(*lcm) + sizeof(*lcme) * comp_cnt; LASSERT(offset % sizeof(__u64) == 0); @@ -1221,6 +1223,9 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo, GOTO(out, rc = -EINVAL); lo->ldo_layout_gen = le32_to_cpu(comp_v1->lcm_layout_gen); lo->ldo_is_composite = 1; + lo->ldo_flr_state = le16_to_cpu(comp_v1->lcm_flags) & + LCM_FL_FLR_MASK; + lo->ldo_mirror_count = le16_to_cpu(comp_v1->lcm_mirror_count); } else { comp_cnt = 1; lo->ldo_layout_gen = le16_to_cpu(lmm->lmm_layout_gen); @@ -1268,9 +1273,10 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo, if (magic == LOV_MAGIC_V3) { struct lov_mds_md_v3 *v3 = (struct lov_mds_md_v3 *)lmm; + lod_set_pool(&lod_comp->llc_pool, v3->lmm_pool_name); objs = &v3->lmm_objects[0]; - /* no need to set pool, which is used in create only */ } else { + lod_set_pool(&lod_comp->llc_pool, NULL); objs = &lmm->lmm_objects[0]; } @@ -1623,8 +1629,8 @@ out: * \retval 0 if the striping is valid * \retval -EINVAL if striping is invalid */ -int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, - bool is_from_disk, __u64 start) +int lod_verify_striping(struct lod_device *d, struct lod_object *lo, + const struct lu_buf *buf, bool is_from_disk) { struct lov_user_md_v1 *lum; struct lov_comp_md_v1 *comp_v1; @@ -1655,8 +1661,8 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, struct lu_extent *ext; struct lov_desc *desc = &d->lod_desc; struct lu_buf tmp; + __u64 prev_end = 0; __u32 stripe_size = 0; - __u64 prev_end = start; comp_v1 = buf->lb_buf; if (buf->lb_len < le32_to_cpu(comp_v1->lcm_size)) { @@ -1670,6 +1676,14 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, RETURN(-EINVAL); } + if (S_ISREG(lod2lu_obj(lo)->lo_header->loh_attr) && + lo->ldo_comp_cnt > 0) { + __u32 cnt = lo->ldo_comp_cnt; + + ext = &lo->ldo_comp_entries[cnt - 1].llc_extent; + prev_end = ext->e_end; + } + for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) { ent = &comp_v1->lcm_entries[i]; ext = &ent->lcme_extent; @@ -1699,6 +1713,7 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, le64_to_cpu(ext->e_start), prev_end); RETURN(-EINVAL); } + prev_end = le64_to_cpu(ext->e_end); tmp.lb_buf = (char *)comp_v1 + diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 6fbedf0..ef65469 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -2017,7 +2017,7 @@ static int lod_dir_declare_xattr_set(const struct lu_env *env, if (rc != 0) RETURN(rc); } else if (strcmp(name, XATTR_NAME_LOV) == 0) { - rc = lod_verify_striping(d, buf, false, 0); + rc = lod_verify_striping(d, lo, buf, false); if (rc != 0) RETURN(rc); } @@ -2226,14 +2226,12 @@ static int lod_declare_layout_add(const struct lu_env *env, struct lov_user_md_v3 *v3; struct lov_comp_md_v1 *comp_v1 = buf->lb_buf; __u32 magic; - __u64 prev_end; int i, rc, array_cnt; ENTRY; LASSERT(lo->ldo_is_composite); - prev_end = lo->ldo_comp_entries[lo->ldo_comp_cnt - 1].llc_extent.e_end; - rc = lod_verify_striping(d, buf, false, prev_end); + rc = lod_verify_striping(d, lo, buf, false); if (rc != 0) RETURN(rc); @@ -2266,6 +2264,7 @@ static int lod_declare_layout_add(const struct lu_env *env, lod_comp->llc_extent.e_start = ext->e_start; lod_comp->llc_extent.e_end = ext->e_end; lod_comp->llc_stripe_offset = v1->lmm_stripe_offset; + lod_comp->llc_flags = comp_v1->lcm_entries[i].lcme_flags; lod_comp->llc_stripe_count = v1->lmm_stripe_count; if (!lod_comp->llc_stripe_count || @@ -2291,6 +2290,7 @@ static int lod_declare_layout_add(const struct lu_env *env, OBD_FREE(lo->ldo_comp_entries, sizeof(*lod_comp) * lo->ldo_comp_cnt); lo->ldo_comp_entries = comp_array; lo->ldo_comp_cnt = array_cnt; + /* No need to increase layout generation here, it will be increased * later when generating component ID for the new components */ @@ -2422,10 +2422,6 @@ static int lod_declare_layout_del(const struct lu_env *env, LASSERT(lo->ldo_is_composite); - rc = lod_verify_striping(d, buf, false, 0); - if (rc != 0) - RETURN(rc); - magic = comp_v1->lcm_magic; if (magic == __swab32(LOV_USER_MAGIC_COMP_V1)) { lustre_swab_lov_comp_md_v1(comp_v1); @@ -2592,6 +2588,139 @@ unlock: } /** + * Merge layouts to form a mirrored file. + */ +static int lod_declare_layout_merge(const struct lu_env *env, + struct dt_object *dt, const struct lu_buf *mbuf, + struct thandle *th) +{ + struct lod_thread_info *info = lod_env_info(env); + struct lu_buf *buf = &info->lti_buf; + struct lod_object *lo = lod_dt_obj(dt); + struct lov_comp_md_v1 *lcm; + struct lov_comp_md_v1 *cur_lcm; + struct lov_comp_md_v1 *merge_lcm; + struct lov_comp_md_entry_v1 *lcme; + size_t size = 0; + size_t offset; + __u16 cur_entry_count; + __u16 merge_entry_count; + __u32 id = 0; + __u16 mirror_id = 0; + __u32 mirror_count; + int rc, i; + ENTRY; + + merge_lcm = mbuf->lb_buf; + if (mbuf->lb_len < sizeof(*merge_lcm)) + RETURN(-EINVAL); + + /* must be an existing layout from disk */ + if (le32_to_cpu(merge_lcm->lcm_magic) != LOV_MAGIC_COMP_V1) + RETURN(-EINVAL); + + merge_entry_count = le16_to_cpu(merge_lcm->lcm_entry_count); + + /* do not allow to merge two mirrored files */ + if (le16_to_cpu(merge_lcm->lcm_mirror_count)) + RETURN(-EBUSY); + + /* verify the target buffer */ + rc = lod_get_lov_ea(env, lo); + if (rc <= 0) + RETURN(rc ? : -ENODATA); + + cur_lcm = info->lti_ea_store; + if (le32_to_cpu(cur_lcm->lcm_magic) != LOV_MAGIC_COMP_V1) + RETURN(-EINVAL); + + cur_entry_count = le16_to_cpu(cur_lcm->lcm_entry_count); + + /* 'lcm_mirror_count + 1' is the current # of mirrors the file has */ + mirror_count = le16_to_cpu(cur_lcm->lcm_mirror_count) + 1; + if (mirror_count + 1 > LUSTRE_MIRROR_COUNT_MAX) + RETURN(-ERANGE); + + /* size of new layout */ + size = le32_to_cpu(cur_lcm->lcm_size) + + le32_to_cpu(merge_lcm->lcm_size) - sizeof(*cur_lcm); + + memset(buf, 0, sizeof(*buf)); + lu_buf_alloc(buf, size); + if (buf->lb_buf == NULL) + RETURN(-ENOMEM); + + lcm = buf->lb_buf; + memcpy(lcm, cur_lcm, sizeof(*lcm) + cur_entry_count * sizeof(*lcme)); + + offset = sizeof(*lcm) + + sizeof(*lcme) * (cur_entry_count + merge_entry_count); + for (i = 0; i < cur_entry_count; i++) { + struct lov_comp_md_entry_v1 *cur_lcme; + + lcme = &lcm->lcm_entries[i]; + cur_lcme = &cur_lcm->lcm_entries[i]; + + lcme->lcme_offset = cpu_to_le32(offset); + memcpy((char *)lcm + offset, + (char *)cur_lcm + le32_to_cpu(cur_lcme->lcme_offset), + le32_to_cpu(lcme->lcme_size)); + + offset += le32_to_cpu(lcme->lcme_size); + + if (mirror_count == 1) { + /* new mirrored file, create new mirror ID */ + id = pflr_id(1, i + 1); + lcme->lcme_id = cpu_to_le32(id); + } + + id = MAX(le32_to_cpu(lcme->lcme_id), id); + } + + mirror_id = mirror_id_of(id) + 1; + for (i = 0; i < merge_entry_count; i++) { + struct lov_comp_md_entry_v1 *merge_lcme; + + merge_lcme = &merge_lcm->lcm_entries[i]; + lcme = &lcm->lcm_entries[cur_entry_count + i]; + + *lcme = *merge_lcme; + lcme->lcme_offset = cpu_to_le32(offset); + + id = pflr_id(mirror_id, i + 1); + lcme->lcme_id = cpu_to_le32(id); + + memcpy((char *)lcm + offset, + (char *)merge_lcm + le32_to_cpu(merge_lcme->lcme_offset), + le32_to_cpu(lcme->lcme_size)); + + offset += le32_to_cpu(lcme->lcme_size); + } + + /* fixup layout information */ + lod_obj_inc_layout_gen(lo); + lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen); + lcm->lcm_size = cpu_to_le32(size); + lcm->lcm_entry_count = cpu_to_le16(cur_entry_count + merge_entry_count); + lcm->lcm_mirror_count = cpu_to_le16(mirror_count); + if ((le16_to_cpu(lcm->lcm_flags) & LCM_FL_FLR_MASK) == LCM_FL_NOT_FLR) + lcm->lcm_flags = cpu_to_le32(LCM_FL_RDONLY); + + LASSERT(dt_write_locked(env, dt_object_child(dt))); + lod_object_free_striping(env, lo); + rc = lod_parse_striping(env, lo, buf); + if (rc) + GOTO(out, rc); + + rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), buf, + XATTR_NAME_LOV, LU_XATTR_REPLACE, th); + +out: + lu_buf_free(buf); + RETURN(rc); +} + +/** * Implementation of dt_object_operations::do_declare_xattr_set. * * \see dt_object_operations::do_declare_xattr_set() in the API description @@ -2614,7 +2743,8 @@ static int lod_declare_xattr_set(const struct lu_env *env, ENTRY; mode = dt->do_lu.lo_header->loh_attr & S_IFMT; - if ((S_ISREG(mode) || mode == 0) && !(fl & LU_XATTR_REPLACE) && + if ((S_ISREG(mode) || mode == 0) && + !(fl & (LU_XATTR_REPLACE | LU_XATTR_MERGE)) && (strcmp(name, XATTR_NAME_LOV) == 0 || strcmp(name, XATTR_LUSTRE_LOV) == 0)) { /* @@ -2636,6 +2766,10 @@ static int lod_declare_xattr_set(const struct lu_env *env, attr->la_mode = S_IFREG; } rc = lod_declare_striped_create(env, dt, attr, buf, th); + } else if (fl & LU_XATTR_MERGE) { + LASSERT(strcmp(name, XATTR_NAME_LOV) == 0 || + strcmp(name, XATTR_LUSTRE_LOV) == 0); + rc = lod_declare_layout_merge(env, dt, buf, th); } else if (S_ISREG(mode) && strlen(name) > strlen(XATTR_LUSTRE_LOV) + 1 && strncmp(name, XATTR_LUSTRE_LOV, diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index fbb8111..01beb59 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -1847,7 +1847,7 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo, if (buf == NULL || buf->lb_buf == NULL || buf->lb_len == 0) RETURN(0); - rc = lod_verify_striping(d, buf, false, 0); + rc = lod_verify_striping(d, lo, buf, false); if (rc) RETURN(-EINVAL); diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index 893659c..3ee9763 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -441,6 +441,7 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size) lsm->lsm_magic = le32_to_cpu(lcm->lcm_magic); lsm->lsm_layout_gen = le32_to_cpu(lcm->lcm_layout_gen); lsm->lsm_entry_count = entry_count; + lsm->lsm_mirror_count = le16_to_cpu(lcm->lcm_mirror_count); lsm->lsm_flags = le16_to_cpu(lcm->lcm_flags); lsm->lsm_is_released = true; lsm->lsm_maxbytes = LLONG_MIN; diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 6142e7c..458049c 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -82,7 +82,8 @@ struct lov_stripe_md { u32 lsm_layout_gen; u16 lsm_flags; bool lsm_is_released; - u32 lsm_entry_count; + u16 lsm_mirror_count; + u16 lsm_entry_count; struct lov_stripe_md_entry *lsm_entries[]; }; diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index 3796522..efcb442 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -207,6 +207,7 @@ ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf, lcmv1->lcm_size = cpu_to_le32(lmm_size); lcmv1->lcm_layout_gen = cpu_to_le32(lsm->lsm_layout_gen); lcmv1->lcm_flags = cpu_to_le16(lsm->lsm_flags); + lcmv1->lcm_mirror_count = cpu_to_le16(lsm->lsm_mirror_count); lcmv1->lcm_entry_count = cpu_to_le16(lsm->lsm_entry_count); offset = sizeof(*lcmv1) + sizeof(*lcme) * lsm->lsm_entry_count; diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index b38c91c..e48de25 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -440,8 +440,7 @@ static void mdc_intent_close_pack(struct ptlrpc_request *req, struct ldlm_lock *lock; enum mds_op_bias bias = op_data->op_bias; - if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP | - MDS_RENAME_MIGRATE))) + if (!(bias & (MDS_CLOSE_INTENT | MDS_RENAME_MIGRATE))) return; data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 1641161..d0dfe2d 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -776,7 +776,8 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data, /* save the errcode and proceed to close */ saved_rc = rc; } - } else if (op_data->op_bias & MDS_CLOSE_LAYOUT_SWAP) { + } else if (op_data->op_bias & (MDS_CLOSE_LAYOUT_SWAP | + MDS_CLOSE_LAYOUT_MERGE)) { req_fmt = &RQF_MDS_INTENT_CLOSE; } else { req_fmt = &RQF_MDS_CLOSE; diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 0fa360d..d16f363 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -1071,9 +1071,126 @@ free: return rc; } +static int mdd_declare_xattr_del(const struct lu_env *env, + struct mdd_device *mdd, + struct mdd_object *obj, + const char *name, + struct thandle *handle); + static int mdd_xattr_del(const struct lu_env *env, struct md_object *obj, const char *name); +static int mdd_xattr_merge(const struct lu_env *env, struct md_object *md_obj, + struct md_object *md_vic) +{ + struct mdd_device *mdd = mdo2mdd(md_obj); + struct mdd_object *obj = md2mdd_obj(md_obj); + struct mdd_object *vic = md2mdd_obj(md_vic); + struct lu_buf *buf = &mdd_env_info(env)->mti_buf[0]; + struct lu_buf *buf_vic = &mdd_env_info(env)->mti_buf[1]; + struct lov_mds_md *lmm; + struct thandle *handle; + int rc; + ENTRY; + + rc = lu_fid_cmp(mdo2fid(obj), mdo2fid(vic)); + if (rc == 0) /* same fid */ + RETURN(-EPERM); + + handle = mdd_trans_create(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + if (rc > 0) { + mdd_write_lock(env, obj, MOR_TGT_CHILD); + mdd_write_lock(env, vic, MOR_TGT_CHILD); + } else { + mdd_write_lock(env, vic, MOR_TGT_CHILD); + mdd_write_lock(env, obj, MOR_TGT_CHILD); + } + + /* get EA of victim file */ + memset(buf_vic, 0, sizeof(*buf_vic)); + rc = mdd_get_lov_ea(env, vic, buf_vic); + if (rc < 0) { + if (rc == -ENODATA) + rc = 0; + GOTO(out, rc); + } + + /* parse the layout of victim file */ + lmm = buf_vic->lb_buf; + if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_COMP_V1) + GOTO(out, rc = -EINVAL); + + /* save EA of target file for restore */ + memset(buf, 0, sizeof(*buf)); + rc = mdd_get_lov_ea(env, obj, buf); + if (rc < 0) + GOTO(out, rc); + + /* Get rid of the layout from victim object */ + rc = mdd_declare_xattr_del(env, mdd, vic, XATTR_NAME_LOV, handle); + if (rc) + GOTO(out, rc); + + rc = mdd_declare_xattr_set(env, mdd, obj, buf_vic, XATTR_LUSTRE_LOV, + LU_XATTR_MERGE, handle); + if (rc) + GOTO(out, rc); + + rc = mdd_trans_start(env, mdd, handle); + if (rc != 0) + GOTO(out, rc); + + rc = mdo_xattr_set(env, obj, buf_vic, XATTR_LUSTRE_LOV, LU_XATTR_MERGE, + handle); + if (rc) + GOTO(out, rc); + + rc = mdo_xattr_del(env, vic, XATTR_NAME_LOV, handle); + if (rc) { /* wtf? */ + int rc2; + + rc2 = mdo_xattr_set(env, obj, buf, XATTR_NAME_LOV, + LU_XATTR_REPLACE, handle); + if (rc2) + CERROR("%s: failed to rollback of layout of: "DFID + ": %d, file state unknown\n", + mdd_obj_dev_name(obj), PFID(mdo2fid(obj)), rc2); + GOTO(out, rc); + } + + (void)mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, obj, handle); + (void)mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, vic, handle); + EXIT; + +out: + mdd_trans_stop(env, mdd, rc, handle); + mdd_write_unlock(env, obj); + mdd_write_unlock(env, vic); + lu_buf_free(buf); + lu_buf_free(buf_vic); + + return rc; +} + +static int mdd_layout_merge_allowed(const struct lu_env *env, + struct md_object *target, + struct md_object *victim) +{ + struct mdd_object *o1 = md2mdd_obj(target); + + /* cannot extend directory's LOVEA */ + if (S_ISDIR(mdd_object_type(o1))) { + CERROR("%s: Don't extend directory's LOVEA, just set it.\n", + mdd_obj_dev_name(o1)); + RETURN(-EISDIR); + } + + RETURN(0); +} + /** * The caller should guarantee to update the object ctime * after xattr_set if needed. @@ -1099,6 +1216,21 @@ static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj, if (rc) RETURN(rc); + if (strcmp(name, XATTR_LUSTRE_LOV) == 0 && fl == LU_XATTR_MERGE) { + struct md_object *victim = buf->lb_buf; + + if (buf->lb_len != sizeof(victim)) + RETURN(-EINVAL); + + rc = mdd_layout_merge_allowed(env, obj, victim); + if (rc) + RETURN(rc); + + /* merge layout of victim as a mirror of obj's. */ + rc = mdd_xattr_merge(env, obj, victim); + RETURN(rc); + } + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0 || strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) { struct posix_acl *acl; diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index 3f3a7dc..344160b 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -1058,7 +1058,10 @@ static int mdt_setattr_unpack_rec(struct mdt_thread_info *info) ma->ma_attr_flags |= MDS_CLOSE_LAYOUT_SWAP; else ma->ma_attr_flags &= ~MDS_CLOSE_LAYOUT_SWAP; - + if (rec->sa_bias & MDS_CLOSE_LAYOUT_MERGE) + ma->ma_attr_flags |= MDS_CLOSE_LAYOUT_MERGE; + else + ma->ma_attr_flags &= ~MDS_CLOSE_LAYOUT_MERGE; RETURN(0); } @@ -1137,7 +1140,7 @@ static int mdt_intent_close_unpack(struct mdt_thread_info *info) struct req_capsule *pill = info->mti_pill; ENTRY; - if (!(ma->ma_attr_flags & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP))) + if (!(ma->ma_attr_flags & MDS_CLOSE_INTENT)) RETURN(0); req_capsule_extend(pill, &RQF_MDS_INTENT_CLOSE); diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index 2b6ee7d..197b852 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -1911,8 +1911,8 @@ out_reprocess: return rc; } -int mdt_close_swap_layouts(struct mdt_thread_info *info, - struct mdt_object *o, struct md_attr *ma) +int mdt_close_handle_layouts(struct mdt_thread_info *info, + struct mdt_object *o, struct md_attr *ma) { struct mdt_lock_handle *lh1 = &info->mti_lh[MDT_LH_NEW]; struct mdt_lock_handle *lh2 = &info->mti_lh[MDT_LH_OLD]; @@ -2005,8 +2005,17 @@ int mdt_close_swap_layouts(struct mdt_thread_info *info, GOTO(out_unlock1, rc); /* Swap layout with orphan object */ - rc = mo_swap_layouts(info->mti_env, mdt_object_child(o1), - mdt_object_child(o2), 0); + if (ma->ma_attr_flags & MDS_CLOSE_LAYOUT_SWAP) { + rc = mo_swap_layouts(info->mti_env, mdt_object_child(o1), + mdt_object_child(o2), 0); + } else if (ma->ma_attr_flags & MDS_CLOSE_LAYOUT_MERGE) { + struct lu_buf *buf = &info->mti_buf; + + buf->lb_len = sizeof(void *); + buf->lb_buf = mdt_object_child(o == o1 ? o2 : o1); + rc = mo_xattr_set(info->mti_env, mdt_object_child(o), buf, + XATTR_LUSTRE_LOV, LU_XATTR_MERGE); + } if (rc < 0) GOTO(out_unlock2, rc); @@ -2060,11 +2069,14 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd) struct md_attr *ma = &info->mti_attr; int rc = 0; __u64 mode; + __u64 intent; ENTRY; mode = mfd->mfd_mode; - if (ma->ma_attr_flags & MDS_HSM_RELEASE) { + intent = ma->ma_attr_flags & MDS_CLOSE_INTENT; + switch (intent) { + case MDS_HSM_RELEASE: { rc = mdt_hsm_release(info, o, ma); if (rc < 0) { CDEBUG(D_HSM, "%s: File " DFID " release failed: %d\n", @@ -2072,10 +2084,11 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd) PFID(mdt_object_fid(o)), rc); /* continue to close even error occurred. */ } + break; } - - if (ma->ma_attr_flags & MDS_CLOSE_LAYOUT_SWAP) { - rc = mdt_close_swap_layouts(info, o, ma); + case MDS_CLOSE_LAYOUT_MERGE: + case MDS_CLOSE_LAYOUT_SWAP: { + rc = mdt_close_handle_layouts(info, o, ma); if (rc < 0) { CDEBUG(D_INODE, "%s: cannot swap layout of "DFID": rc=%d\n", @@ -2083,6 +2096,11 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd) PFID(mdt_object_fid(o)), rc); /* continue to close even if error occurred. */ } + break; + } + default: + /* nothing */ + break; } if (mode & FMODE_WRITE) diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 74262a5..7c80969 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2185,6 +2185,7 @@ void lustre_print_user_md(unsigned int lvl, struct lov_user_md *lum, CDEBUG(lvl, "\tlcm_layout_gen: %#x\n", comp_v1->lcm_layout_gen); CDEBUG(lvl, "\tlcm_flags: %#x\n", comp_v1->lcm_flags); CDEBUG(lvl, "\tlcm_entry_count: %#x\n\n", comp_v1->lcm_entry_count); + CDEBUG(lvl, "\tlcm_mirror_count: %#x\n\n", comp_v1->lcm_mirror_count); for (i = 0; i < comp_v1->lcm_entry_count; i++) { struct lov_comp_md_entry_v1 *ent = &comp_v1->lcm_entries[i]; @@ -2266,6 +2267,7 @@ void lustre_swab_lov_comp_md_v1(struct lov_comp_md_v1 *lum) __swab32s(&lum->lcm_layout_gen); __swab16s(&lum->lcm_flags); __swab16s(&lum->lcm_entry_count); + __swab16s(&lum->lcm_mirror_count); CLASSERT(offsetof(typeof(*lum), lcm_padding1) != 0); CLASSERT(offsetof(typeof(*lum), lcm_padding2) != 0); diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 9240f37..cdb23de 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -1705,6 +1705,8 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding)); LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n", (unsigned)LCME_FL_INIT); + LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n", + (unsigned)LCME_FL_NEG); /* Checks for struct lov_comp_md_v1 */ LASSERTF((int)sizeof(struct lov_comp_md_v1) == 32, "found %lld\n", @@ -1729,9 +1731,13 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_comp_md_v1, lcm_entry_count)); LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count) == 2, "found %lld\n", (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count)); - LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 16, "found %lld\n", + LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_mirror_count) == 16, "found %lld\n", + (long long)(int)offsetof(struct lov_comp_md_v1, lcm_mirror_count)); + LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count)); + LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 18, "found %lld\n", (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding1)); - LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 8, "found %lld\n", + LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 6, "found %lld\n", (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1)); LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding2) == 24, "found %lld\n", (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding2)); @@ -1742,6 +1748,14 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]) == 48, "found %lld\n", (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0])); CLASSERT(LOV_MAGIC_COMP_V1 == (0x0BD60000 | 0x0BD0)); + LASSERTF(LCM_FL_NOT_FLR == 0, "found %lld\n", + (long long)LCM_FL_NOT_FLR); + LASSERTF(LCM_FL_RDONLY == 1, "found %lld\n", + (long long)LCM_FL_RDONLY); + LASSERTF(LCM_FL_WRITE_PENDING == 2, "found %lld\n", + (long long)LCM_FL_WRITE_PENDING); + LASSERTF(LCM_FL_SYNC_PENDING == 3, "found %lld\n", + (long long)LCM_FL_SYNC_PENDING); /* Checks for struct lmv_mds_md_v1 */ LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n", diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 68dabfb..9832547 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -38,7 +38,7 @@ noinst_SCRIPTS += setup-cifs.sh parallel-scale-cifs.sh noinst_SCRIPTS += posix.sh sanity-scrub.sh scrub-performance.sh ha.sh noinst_SCRIPTS += sanity-lfsck.sh lfsck-performance.sh noinst_SCRIPTS += resolveip -noinst_SCRIPTS += sanity-hsm.sh sanity-lsnapshot.sh sanity-pfl.sh +noinst_SCRIPTS += sanity-hsm.sh sanity-lsnapshot.sh sanity-pfl.sh sanity-flr.sh noinst_SCRIPTS += sanity-dom.sh dom-performance.sh nobase_noinst_SCRIPTS = cfg/local.sh nobase_noinst_SCRIPTS += test-groups/regression test-groups/regression-mpi diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh new file mode 100644 index 0000000..94767b9 --- /dev/null +++ b/lustre/tests/sanity-flr.sh @@ -0,0 +1,157 @@ +#!/bin/bash +# +# Run select tests by setting ONLY, or as arguments to the script. +# Skip specific tests by setting EXCEPT. +# +# Run test by setting NOSETUP=true when ltest has setup env for us +set -e +set +o posix + +SRCDIR=$(dirname $0) +export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin + +ONLY=${ONLY:-"$*"} +# Bug number for skipped test: +ALWAYS_EXCEPT="$SANITY_FLR_EXCEPT" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +[ "$ALWAYS_EXCEPT$EXCEPT" ] && + echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT" + +TMP=${TMP:-/tmp} +CHECKSTAT=${CHECKSTAT:-"checkstat -v"} +LFS=${LFS:-lfs} +LCTL=${LCTL:-lctl} +MULTIOP=${MULTIOP:-multiop} + +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging + +check_and_setup_lustre +DIR=${DIR:-$MOUNT} +assert_DIR + +if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ]]; then + skip_env "Need MDS version at least 2.7.64" && exit +fi + +build_test_filter + +[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && + error "\$RUNAS_ID set to 0, but \$UID is also 0!" +check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS + +# global array to store mirror IDs +declare -a mirror_array +get_mirror_ids() { + local tf=$1 + local id + local array + + array=() + for id in $($LFS getstripe $tf | awk '/lcme_id/{print $2}'); do + array[${#array[@]}]=$((id >> 16)) + done + + mirror_array=($(printf "%s\n" "${array[@]}" | sort -u)) + + echo ${#mirror_array[@]} +} + +# command line test cases +test_1() { + local tf=$DIR/$tfile + local mirror_count=16 # LUSTRE_MIRROR_COUNT_MAX + + $LFS setstripe -E EOF -c -1 $tf + + local stripes[0]=$OSTCOUNT + + for ((i = 1; i < $mirror_count; i++)); do + # add mirrors with different stripes to the file + stripes[$i]=$((RANDOM % OSTCOUNT)) + [ ${stripes[$i]} -eq 0 ] && stripes[$i]=1 + + $LFS setstripe --component-add --mirror -c ${stripes[$i]} $tf + done + + [ $(get_mirror_ids $tf) -ne $mirror_count ] && + error "mirror count error" + + # can't create mirrors exceeding LUSTRE_MIRROR_COUNT_MAX + $LFS setstripe --component-add --mirror $tf && + error "Creating the $((mirror_count+1))th mirror succeeded" + + local ids=($($LFS getstripe $tf | awk '/lcme_id/{print $2}' | + tr '\n' ' ')) + + # verify the range of components and stripe counts + for ((i = 0; i < $mirror_count; i++)); do + local sc=$($LFS getstripe -I${ids[$i]} -c $tf) + local start=$($LFS getstripe -I${ids[$i]} --component-start $tf) + local end=$($LFS getstripe -I${ids[$i]} --component-end $tf) + + [[ ${stripes[$i]} = $sc ]] || { + $LFS getstripe -v $tf; + error "$i: sc error: id: ${ids[$i]}, ${stripes[$i]}"; + } + [ $start -eq 0 ] || { + $LFS getstripe -v $tf; + error "$i: start error id: ${ids[$i]}"; + } + [ $end = "EOF" ] || { + $LFS getstripe -v $tf; + error "$i: end error id: ${ids[$i]}"; + } + done +} +run_test 1 "create components with setstripe options" + +test_2() { + local tf=$DIR/$tfile + local tf2=$DIR/$tfile-2 + + $LFS setstripe -E 1M -E EOF -c 1 $tf + $LFS setstripe -E 2M -E EOF -c -1 $tf2 + + local layout=$($LFS getstripe $tf2 | grep -A 4 lmm_objects) + + $LFS setstripe --component-add --mirror=$tf2 $tf + + [ $(get_mirror_ids $tf) -ne 2 ] && error "mirror count should be 2" + $LFS getstripe $tf2 | grep -q 'no stripe info' || + error "$tf2 still has stripe info" +} +run_test 2 "create components from existing files" + +test_3() { + [[ $MDSCOUNT -lt 2 ]] && skip "need >= 2 MDTs" && return + + for ((i = 0; i < 2; i++)); do + $LFS mkdir -i $i $DIR/$tdir-$i + $LFS setstripe -E -1 $DIR/$tdir-$i/$tfile + done + + $LFS setstripe --component-add --mirror=$DIR/$tdir-1/$tfile \ + $DIR/$tdir-0/$tfile || error "creating mirrors" + + # mdt doesn't support to cancel layout lock for remote objects, do + # it here manually. + cancel_lru_locks mdc + + # make sure the mirrorted file was created successfully + [[ $($LFS getstripe --component-count $DIR/$tdir-0/$tfile) -eq 2 ]] || + { $LFS getstripe $DIR/$tdir-0/$tfile; + error "expected 2 components"; } + + # cleanup + rm -rf $DIR/$tdir-* +} +run_test 3 "create components from files located on different MDTs" + +complete $SECONDS +check_and_cleanup_lustre +exit_status diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 026e36e..32c403b 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -166,7 +166,6 @@ static int lfs_list_commands(int argc, char **argv); "\tmode: the mode of the directory\n" static const char *progname; -static bool file_lease_supported = true; /* all available commands */ command_t cmdlist[] = { @@ -411,8 +410,6 @@ command_t cmdlist[] = { }; -#define MIGRATION_NONBLOCK 1 - static int check_hashtype(const char *hashtype) { int i; @@ -424,47 +421,148 @@ static int check_hashtype(const char *hashtype) return 0; } -/** - * Internal helper for migrate_copy_data(). Check lease and report error if - * need be. - * - * \param[in] fd File descriptor on which to check the lease. - * \param[out] lease_broken Set to true if the lease was broken. - * \param[in] group_locked Whether a group lock was taken or not. - * \param[in] path Name of the file being processed, for error - * reporting - * - * \retval 0 Migration can keep on going. - * \retval -errno Error occurred, abort migration. - */ -static int check_lease(int fd, bool *lease_broken, bool group_locked, - const char *path) + +static const char *error_loc = "syserror"; + +enum { + MIGRATION_NONBLOCK = 1 << 0, + MIGRATION_MIRROR = 1 << 1, +}; + +static int lfs_component_create(char *fname, int open_flags, mode_t open_mode, + struct llapi_layout *layout); + +static int +migrate_open_files(const char *name, const struct llapi_stripe_param *param, + struct llapi_layout *layout, int *fd_src, int *fd_tgt) { - int rc; + int fd = -1; + int fdv = -1; + int mdt_index; + int random_value; + char parent[PATH_MAX]; + char volatile_file[PATH_MAX]; + char *ptr; + int rc; + struct stat st; + struct stat stv; - if (!file_lease_supported) - return 0; + if (param == NULL && layout == NULL) { + error_loc = "layout information"; + return -EINVAL; + } - rc = llapi_lease_check(fd); - if (rc > 0) - return 0; /* llapi_check_lease returns > 0 on success. */ + /* search for file directory pathname */ + if (strlen(name) > sizeof(parent) - 1) { + error_loc = "source file name"; + return -ERANGE; + } - if (!group_locked) { - fprintf(stderr, "%s: cannot migrate '%s': file busy\n", - progname, path); - rc = rc ? rc : -EAGAIN; + strncpy(parent, name, sizeof(parent)); + ptr = strrchr(parent, '/'); + if (ptr == NULL) { + if (getcwd(parent, sizeof(parent)) == NULL) { + error_loc = "getcwd"; + return -errno; + } } else { - fprintf(stderr, "%s: external attempt to access file '%s' " - "blocked until migration ends.\n", progname, path); - rc = 0; + if (ptr == parent) /* leading '/' */ + ptr = parent + 1; + *ptr = '\0'; + } + + /* open file, direct io */ + /* even if the file is only read, WR mode is nedeed to allow + * layout swap on fd */ + fd = open(name, O_RDWR | O_DIRECT); + if (fd < 0) { + rc = -errno; + error_loc = "cannot open source file"; + return rc; + } + + rc = llapi_file_fget_mdtidx(fd, &mdt_index); + if (rc < 0) { + error_loc = "cannot get MDT index"; + goto out; + } + + do { + int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW; + mode_t open_mode = S_IRUSR | S_IWUSR; + + random_value = random(); + rc = snprintf(volatile_file, sizeof(volatile_file), + "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR, + mdt_index, random_value); + if (rc >= sizeof(volatile_file)) { + rc = -ENAMETOOLONG; + break; + } + + /* create, open a volatile file, use caching (ie no directio) */ + if (param != NULL) + fdv = llapi_file_open_param(volatile_file, open_flags, + open_mode, param); + else + fdv = lfs_component_create(volatile_file, open_flags, + open_mode, layout); + } while (fdv < 0 && (rc = fdv) == -EEXIST); + + if (rc < 0) { + error_loc = "cannot create volatile file"; + goto out; + } + + /* In case the MDT does not support creation of volatile files + * we should try to unlink it. */ + (void)unlink(volatile_file); + + /* Not-owner (root?) special case. + * Need to set owner/group of volatile file like original. + * This will allow to pass related check during layout_swap. + */ + rc = fstat(fd, &st); + if (rc != 0) { + rc = -errno; + error_loc = "cannot stat source file"; + goto out; + } + + rc = fstat(fdv, &stv); + if (rc != 0) { + rc = -errno; + error_loc = "cannot stat volatile"; + goto out; + } + + if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) { + rc = fchown(fdv, st.st_uid, st.st_gid); + if (rc != 0) { + rc = -errno; + error_loc = "cannot change ownwership of volatile"; + goto out; + } + } + +out: + if (rc < 0) { + if (fd > 0) + close(fd); + if (fdv > 0) + close(fdv); + } else { + *fd_src = fd; + *fd_tgt = fdv; + error_loc = NULL; } - *lease_broken = true; return rc; } -static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size, - bool group_locked, const char *fname) +static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int)) { + struct llapi_layout *layout; + size_t buf_size = 4 * 1024 * 1024; void *buf = NULL; ssize_t rsize = -1; ssize_t wsize = 0; @@ -472,7 +570,17 @@ static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size, size_t wpos = 0; off_t bufoff = 0; int rc; - bool lease_broken = false; + + layout = llapi_layout_get_by_fd(fd_src, 0); + if (layout != NULL) { + uint64_t stripe_size; + + rc = llapi_layout_stripe_size_get(layout, &stripe_size); + if (rc == 0) + buf_size = stripe_size; + + llapi_layout_free(layout); + } /* Use a page-aligned buffer for direct I/O */ rc = posix_memalign(&buf, getpagesize(), buf_size); @@ -483,18 +591,16 @@ static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size, /* read new data only if we have written all * previously read data */ if (wpos == rpos) { - if (!lease_broken) { - rc = check_lease(fd_src, &lease_broken, - group_locked, fname); + if (check_file) { + rc = check_file(fd_src); if (rc < 0) - goto out; + break; } + rsize = read(fd_src, buf, buf_size); if (rsize < 0) { rc = -errno; - fprintf(stderr, "%s: %s: read failed: %s\n", - progname, fname, strerror(-rc)); - goto out; + break; } rpos += rsize; bufoff = 0; @@ -506,39 +612,39 @@ static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size, wsize = write(fd_dst, buf + bufoff, rpos - wpos); if (wsize < 0) { rc = -errno; - fprintf(stderr, - "%s: %s: write failed on volatile: %s\n", - progname, fname, strerror(-rc)); - goto out; + break; } wpos += wsize; bufoff += wsize; } - rc = fsync(fd_dst); - if (rc < 0) { - rc = -errno; - fprintf(stderr, "%s: %s: fsync failed: %s\n", - progname, fname, strerror(-rc)); + if (rc == 0) { + rc = fsync(fd_dst); + if (rc < 0) + rc = -errno; } -out: free(buf); return rc; } -static int migrate_copy_timestamps(int fdv, const struct stat *st) +static int migrate_copy_timestamps(int fd, int fdv) { - struct timeval tv[2] = { - {.tv_sec = st->st_atime}, - {.tv_sec = st->st_mtime} - }; + struct stat st; + + if (fstat(fd, &st) == 0) { + struct timeval tv[2] = { + {.tv_sec = st.st_atime}, + {.tv_sec = st.st_mtime} + }; - return futimes(fdv, tv); + return futimes(fdv, tv); + } + + return -errno; } -static int migrate_block(int fd, int fdv, const struct stat *st, - size_t buf_size, const char *name) +static int migrate_block(int fd, int fdv) { __u64 dv1; int gid; @@ -547,8 +653,7 @@ static int migrate_block(int fd, int fdv, const struct stat *st, rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH); if (rc < 0) { - fprintf(stderr, "%s: %s: cannot get dataversion: %s\n", - progname, name, strerror(-rc)); + error_loc = "cannot get dataversion"; return rc; } @@ -561,22 +666,20 @@ static int migrate_block(int fd, int fdv, const struct stat *st, * block it too. */ rc = llapi_group_lock(fd, gid); if (rc < 0) { - fprintf(stderr, "%s: %s: cannot get group lock: %s\n", - progname, name, strerror(-rc)); + error_loc = "cannot get group lock"; return rc; } - rc = migrate_copy_data(fd, fdv, buf_size, true, name); + rc = migrate_copy_data(fd, fdv, NULL); if (rc < 0) { - fprintf(stderr, "%s: %s: data copy failed\n", progname, name); + error_loc = "data copy failed"; goto out_unlock; } /* Make sure we keep original atime/mtime values */ - rc = migrate_copy_timestamps(fdv, st); + rc = migrate_copy_timestamps(fd, fdv); if (rc < 0) { - fprintf(stderr, "%s: %s: timestamp copy failed\n", - progname, name); + error_loc = "timestamp copy failed"; goto out_unlock; } @@ -588,28 +691,44 @@ static int migrate_block(int fd, int fdv, const struct stat *st, rc = llapi_fswap_layouts_grouplock(fd, fdv, dv1, 0, 0, SWAP_LAYOUTS_CHECK_DV1); if (rc == -EAGAIN) { - fprintf(stderr, "%s: %s: dataversion changed during copy, " - "migration aborted\n", progname, name); + error_loc = "file changed"; goto out_unlock; } else if (rc < 0) { - fprintf(stderr, "%s: %s: cannot swap layouts: %s\n", progname, - name, strerror(-rc)); + error_loc = "cannot swap layout"; goto out_unlock; } out_unlock: rc2 = llapi_group_unlock(fd, gid); if (rc2 < 0 && rc == 0) { - fprintf(stderr, "%s: %s: putting group lock failed: %s\n", - progname, name, strerror(-rc2)); + error_loc = "unlock group lock"; rc = rc2; } return rc; } -static int migrate_nonblock(int fd, int fdv, const struct stat *st, - size_t buf_size, const char *name) +/** + * Internal helper for migrate_copy_data(). Check lease and report error if + * need be. + * + * \param[in] fd File descriptor on which to check the lease. + * + * \retval 0 Migration can keep on going. + * \retval -errno Error occurred, abort migration. + */ +static int check_lease(int fd) +{ + int rc; + + rc = llapi_lease_check(fd); + if (rc > 0) + return 0; /* llapi_check_lease returns > 0 on success. */ + + return -EBUSY; +} + +static int migrate_nonblock(int fd, int fdv) { __u64 dv1; __u64 dv2; @@ -617,47 +736,32 @@ static int migrate_nonblock(int fd, int fdv, const struct stat *st, rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH); if (rc < 0) { - fprintf(stderr, "%s: %s: cannot get data version: %s\n", - progname, name, strerror(-rc)); + error_loc = "cannot get data version"; return rc; } - rc = migrate_copy_data(fd, fdv, buf_size, false, name); + rc = migrate_copy_data(fd, fdv, check_lease); if (rc < 0) { - fprintf(stderr, "%s: %s: data copy failed\n", progname, name); + error_loc = "data copy failed"; return rc; } rc = llapi_get_data_version(fd, &dv2, LL_DV_RD_FLUSH); if (rc != 0) { - fprintf(stderr, "%s: %s: cannot get data version: %s\n", - progname, name, strerror(-rc)); + error_loc = "cannot get data version"; return rc; } if (dv1 != dv2) { rc = -EAGAIN; - fprintf(stderr, "%s: %s: data version changed during " - "migration\n", - progname, name); + error_loc = "source file changed"; return rc; } /* Make sure we keep original atime/mtime values */ - rc = migrate_copy_timestamps(fdv, st); + rc = migrate_copy_timestamps(fd, fdv); if (rc < 0) { - fprintf(stderr, "%s: %s: timestamp copy failed\n", - progname, name); - return rc; - } - - /* Atomically put lease, swap layouts and close. - * for a migration we need to check data version on file did - * not change. */ - rc = llapi_fswap_layouts(fd, fdv, 0, 0, SWAP_LAYOUTS_CLOSE); - if (rc < 0) { - fprintf(stderr, "%s: %s: cannot swap layouts: %s\n", - progname, name, strerror(-rc)); + error_loc = "timestamp copy failed"; return rc; } @@ -735,190 +839,147 @@ static int lfs_migrate(char *name, __u64 migration_flags, struct llapi_stripe_param *param, struct llapi_layout *layout) { - int fd = -1; - int fdv = -1; - char parent[PATH_MAX]; - int mdt_index; - int random_value; - char volatile_file[sizeof(parent) + - LUSTRE_VOLATILE_HDR_LEN + - 2 * sizeof(mdt_index) + - 2 * sizeof(random_value) + 4]; - char *ptr; - int rc; - struct lov_user_md *lum = NULL; - int lum_size; - int buf_size = 1024 * 1024 * 4; - bool have_lease_rdlck = false; - struct stat st; - struct stat stv; + int fd = -1; + int fdv = -1; + int rc; - /* find the right size for the IO and allocate the buffer */ - lum_size = lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3); - lum = malloc(lum_size); - if (lum == NULL) { - rc = -ENOMEM; - goto free; - } + rc = migrate_open_files(name, param, layout, &fd, &fdv); + if (rc < 0) + goto out; - rc = llapi_file_get_stripe(name, lum); - /* failure can happen for many reasons and some may be not real errors - * (eg: no stripe) - * in case of a real error, a later call will fail with better - * error management */ - if (rc == 0) { - if ((lum->lmm_magic == LOV_USER_MAGIC_V1 || - lum->lmm_magic == LOV_USER_MAGIC_V3) && - lum->lmm_stripe_size != 0) - buf_size = lum->lmm_stripe_size; + if (!(migration_flags & MIGRATION_NONBLOCK)) { + /* Blocking mode (forced if servers do not support file lease). + * It is also the default mode, since we cannot distinguish + * between a broken lease and a server that does not support + * atomic swap/close (LU-6785) */ + rc = migrate_block(fd, fdv); + goto out; } - /* open file, direct io */ - /* even if the file is only read, WR mode is nedeed to allow - * layout swap on fd */ - fd = open(name, O_RDWR | O_DIRECT); - if (fd == -1) { - rc = -errno; - fprintf(stderr, "%s: cannot open '%s': %s\n", progname, name, - strerror(-rc)); - goto free; - } - - if (file_lease_supported) { - rc = llapi_lease_get(fd, LL_LEASE_RDLCK); - if (rc == -EOPNOTSUPP) { - /* Older servers do not support file lease. - * Disable related checks. This opens race conditions - * as explained in LU-4840 */ - file_lease_supported = false; - } else if (rc < 0) { - fprintf(stderr, "%s: %s: cannot get open lease: %s\n", - progname, name, strerror(-rc)); - goto error; - } else { - have_lease_rdlck = true; - } + rc = llapi_lease_get(fd, LL_LEASE_RDLCK); + if (rc < 0) { + error_loc = "cannot get lease"; + goto out; } - /* search for file directory pathname */ - if (strlen(name) > sizeof(parent)-1) { - rc = -E2BIG; - goto error; - } - strncpy(parent, name, sizeof(parent)); - ptr = strrchr(parent, '/'); - if (ptr == NULL) { - if (getcwd(parent, sizeof(parent)) == NULL) { - rc = -errno; - goto error; - } - } else { - if (ptr == parent) - strcpy(parent, "/"); - else - *ptr = '\0'; + rc = migrate_nonblock(fd, fdv); + if (rc < 0) { + llapi_lease_put(fd); + goto out; } - rc = llapi_file_fget_mdtidx(fd, &mdt_index); + /* Atomically put lease, swap layouts and close. + * for a migration we need to check data version on file did + * not change. */ + rc = llapi_fswap_layouts(fd, fdv, 0, 0, + migration_flags & MIGRATION_MIRROR ? + MERGE_LAYOUTS_CLOSE : SWAP_LAYOUTS_CLOSE); if (rc < 0) { - fprintf(stderr, "%s: %s: cannot get MDT index: %s\n", - progname, name, strerror(-rc)); - goto error; + error_loc = "cannot swap layout"; + goto out; } - do { - int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW; - mode_t open_mode = S_IRUSR | S_IWUSR; +out: + if (fd >= 0) + close(fd); - random_value = random(); - rc = snprintf(volatile_file, sizeof(volatile_file), - "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR, - mdt_index, random_value); - if (rc >= sizeof(volatile_file)) { - rc = -E2BIG; - goto error; - } + if (fdv >= 0) + close(fdv); - /* create, open a volatile file, use caching (ie no directio) */ - if (param != NULL) - fdv = llapi_file_open_param(volatile_file, open_flags, - open_mode, param); - else if (layout != NULL) - fdv = lfs_component_create(volatile_file, open_flags, - open_mode, layout); - else - fdv = -EINVAL; - } while (fdv == -EEXIST); + if (rc < 0) + fprintf(stderr, "error: %s: %s: %s: %s\n", + progname, name, error_loc, strerror(-rc)); + return rc; +} - if (fdv < 0) { - rc = fdv; - fprintf(stderr, "%s: %s: cannot create volatile file in" - " directory: %s\n", - progname, parent, strerror(-rc)); - goto error; +static int lfs_create_mirror(char *fname, struct llapi_layout *layout, + const char *mirror_file) +{ + int fd = -1; + int fdv = -1; + struct stat stbuf; + struct stat stbuf_v; + __u64 dv; + int rc; + + if (mirror_file == NULL) + return lfs_migrate(fname, MIGRATION_NONBLOCK | MIGRATION_MIRROR, + NULL, layout); + + fd = open(fname, O_RDWR); + if (fd < 0) { + error_loc = "open source file"; + rc = -errno; + goto out; } - /* In case the MDT does not support creation of volatile files - * we should try to unlink it. */ - (void)unlink(volatile_file); + /* Get rid of caching pages from clients */ + rc = llapi_get_data_version(fd, &dv, LL_DV_WR_FLUSH); + if (rc < 0) { + error_loc = "cannot get data version"; + return rc; + } - /* Not-owner (root?) special case. - * Need to set owner/group of volatile file like original. - * This will allow to pass related check during layout_swap. - */ - rc = fstat(fd, &st); - if (rc != 0) { + fdv = open(mirror_file, O_WRONLY); + if (fdv < 0) { + error_loc = "open target file"; rc = -errno; - fprintf(stderr, "%s: %s: cannot stat: %s\n", progname, name, - strerror(errno)); - goto error; + goto out; } - rc = fstat(fdv, &stv); - if (rc != 0) { + + rc = llapi_get_data_version(fdv, &dv, LL_DV_WR_FLUSH); + if (rc < 0) { + error_loc = "cannot get data version"; + return rc; + } + + if (fstat(fd, &stbuf) || fstat(fdv, &stbuf_v)) { + error_loc = "stat source or target file"; rc = -errno; - fprintf(stderr, "%s: %s: cannot stat: %s\n", progname, - volatile_file, strerror(errno)); - goto error; + goto out; } - if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) { - rc = fchown(fdv, st.st_uid, st.st_gid); - if (rc != 0) { - rc = -errno; - fprintf(stderr, "%s: %s: cannot chown: %s\n", progname, - name, strerror(errno)); - goto error; - } + + if (stbuf.st_dev != stbuf_v.st_dev) { + error_loc = "stat source and target file"; + rc = EXDEV; + goto out; } - if (migration_flags & MIGRATION_NONBLOCK && file_lease_supported) { - rc = migrate_nonblock(fd, fdv, &st, buf_size, name); - if (rc == 0) { - have_lease_rdlck = false; - fdv = -1; /* The volatile file is closed as we put the - * lease in non-blocking mode. */ - } - } else { - /* Blocking mode (forced if servers do not support file lease). - * It is also the default mode, since we cannot distinguish - * between a broken lease and a server that does not support - * atomic swap/close (LU-6785) */ - rc = migrate_block(fd, fdv, &st, buf_size, name); + /* mirrors should be of the same size */ + if (stbuf.st_size != stbuf_v.st_size) { + error_loc = "file sizes don't match"; + rc = -EINVAL; + goto out; } -error: - if (have_lease_rdlck) - llapi_lease_put(fd); + rc = llapi_lease_get(fd, LL_LEASE_RDLCK); + if (rc < 0) { + error_loc = "cannot get lease"; + goto out; + } + + /* Make sure we keep original atime/mtime values */ + rc = migrate_copy_timestamps(fd, fdv); + + /* Atomically put lease, swap layouts and close. + * for a migration we need to check data version on file did + * not change. */ + rc = llapi_fswap_layouts(fd, fdv, 0, 0, MERGE_LAYOUTS_CLOSE); + if (rc < 0) { + error_loc = "cannot swap layout"; + goto out; + } +out: if (fd >= 0) close(fd); if (fdv >= 0) close(fdv); -free: - if (lum) - free(lum); - + if (rc < 0) + fprintf(stderr, "error: %s: %s: %s: %s\n", + progname, fname, error_loc, strerror(-rc)); return rc; } @@ -1365,6 +1426,8 @@ static int lfs_setstripe(int argc, char **argv) int comp_add = 0; __u32 comp_id = 0; struct llapi_layout *layout = NULL; + bool create_mirror = false; + const char *mirror_file = NULL; struct option long_opts[] = { /* --block is only valid in migrate mode */ @@ -1404,6 +1467,7 @@ static int lfs_setstripe(int argc, char **argv) { .val = 'm', .name = "mdt", .has_arg = required_argument}, { .val = 'm', .name = "mdt-index", .has_arg = required_argument}, { .val = 'm', .name = "mdt_index", .has_arg = required_argument}, + { .val = 'M', .name = "mirror", .has_arg = optional_argument}, /* --non-block is only valid in migrate mode */ { .val = 'n', .name = "non-block", .has_arg = no_argument}, { .val = 'o', .name = "ost", .has_arg = required_argument}, @@ -1478,6 +1542,15 @@ static int lfs_setstripe(int argc, char **argv) /* delete the default striping pattern */ delete = 1; break; + case 'M': + if (create_mirror) { + fprintf(stderr, "error: %s: --mirror can only " + "be specfied once", argv[0]); + goto error; + } + create_mirror = true; + mirror_file = optarg; + break; case 'E': if (lsa.lsa_comp_end != 0) { result = comp_args_to_layout(&layout, &lsa); @@ -1614,21 +1687,34 @@ static int lfs_setstripe(int argc, char **argv) fname = argv[optind]; - if (lsa.lsa_comp_end != 0) { - result = comp_args_to_layout(&layout, &lsa); - if (result) { - fprintf(stderr, "%s %s: invalid component layout\n", - progname, argv[0]); - goto usage_error; - } - } - if (optind == argc) { fprintf(stderr, "%s %s: FILE must be specified\n", progname, argv[0]); goto usage_error; } + if (create_mirror) { + if (!comp_add) { + fprintf(stderr, "error: %s: --component-add must be " + "specified with --mirror option\n", argv[0]); + goto error; + } + if (lsa.lsa_comp_end == 0) + lsa.lsa_comp_end = LUSTRE_EOF; + if (lsa.lsa_comp_end != LUSTRE_EOF) { + fprintf(stderr, + "error: %s: creating non-eof ending mirror\n", + argv[0]); + goto error; + } + } + + if (lsa.lsa_comp_end != 0) { + result = comp_args_to_layout(&layout, &lsa); + if (result) + goto error; + } + /* Only LCME_FL_INIT flags is used in PFL, and it shouldn't be * altered by user space tool, so we don't need to support the * --component-set for this moment. */ @@ -1687,11 +1773,13 @@ static int lfs_setstripe(int argc, char **argv) progname, argv[0]); goto usage_error; } - result = adjust_first_extent(fname, layout); - if (result == -ENODATA) - comp_add = 0; - else if (result != 0) - goto error; + if (!create_mirror) { + result = adjust_first_extent(fname, layout); + if (result == -ENODATA) + comp_add = 0; + else if (result != 0) + goto error; + } } if (mdt_idx_arg != NULL && optind > 3) { @@ -1772,7 +1860,11 @@ static int lfs_setstripe(int argc, char **argv) result = lfs_component_del(fname, comp_id, lsa.lsa_comp_flags); } else if (comp_add != 0) { - result = lfs_component_add(fname, layout); + if (create_mirror) + result = lfs_create_mirror(fname, layout, + mirror_file); + else + result = lfs_component_add(fname, layout); } else if (layout != NULL) { result = lfs_component_create(fname, O_CREAT | O_WRONLY, 0644, layout); diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index e6008a9..ccf9e8c 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -2591,24 +2591,33 @@ static void lov_dump_comp_v1_header(struct find_param *param, char *path, if (verbose & VERBOSE_DETAIL) { llapi_printf(LLAPI_MSG_NORMAL, "composite_header:\n"); - llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_magic: 0x%08X\n", + llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_magic: 0x%08X\n", " ", comp_v1->lcm_magic); - llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_size: %u\n", + llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_size: %u\n", " ", comp_v1->lcm_size); - llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_flags: %u\n", + llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_flags: %u\n", " ", comp_v1->lcm_flags); } if (verbose & VERBOSE_GENERATION) { if (verbose & ~VERBOSE_GENERATION) - llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_layout_gen: ", + llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_layout_gen: ", " "); llapi_printf(LLAPI_MSG_NORMAL, "%u\n", comp_v1->lcm_layout_gen); } + if (verbose & VERBOSE_MIRROR_COUNT) { + if (verbose & ~VERBOSE_MIRROR_COUNT) + llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_mirror_count: ", + " "); + llapi_printf(LLAPI_MSG_NORMAL, "%u\n", + comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ? + comp_v1->lcm_mirror_count + 1 : 1); + } + if (verbose & VERBOSE_COMP_COUNT) { if (verbose & ~VERBOSE_COMP_COUNT) - llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_entry_count: ", + llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_entry_count: ", " "); llapi_printf(LLAPI_MSG_NORMAL, "%u\n", comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ? diff --git a/lustre/utils/liblustreapi_layout.c b/lustre/utils/liblustreapi_layout.c index f6e477c..5f474b8 100644 --- a/lustre/utils/liblustreapi_layout.c +++ b/lustre/utils/liblustreapi_layout.c @@ -515,8 +515,9 @@ llapi_layout_to_lum(const struct llapi_layout *layout) comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1; comp_v1->lcm_size = lum_size; comp_v1->lcm_layout_gen = 0; - comp_v1->lcm_flags = 0; + comp_v1->lcm_flags = layout->llot_flags; comp_v1->lcm_entry_count = comp_cnt; + comp_v1->lcm_mirror_count = 0; offset += lum_size; } @@ -1511,6 +1512,20 @@ int llapi_layout_file_create(const char *path, int open_flags, int mode, } /** + * Set flags to the header of a component layout. + */ +int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags) +{ + if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) { + errno = EINVAL; + return -1; + } + + layout->llot_flags = flags; + return 0; +} + +/** * Fetch the start and end offset of the current layout component. * * \param[in] layout the layout component diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 5791315..3c42583 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -782,6 +782,7 @@ check_lov_comp_md_entry_v1(void) CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding); CHECK_VALUE_X(LCME_FL_INIT); + CHECK_VALUE_X(LCME_FL_NEG); } static void @@ -794,11 +795,17 @@ check_lov_comp_md_v1(void) CHECK_MEMBER(lov_comp_md_v1, lcm_layout_gen); CHECK_MEMBER(lov_comp_md_v1, lcm_flags); CHECK_MEMBER(lov_comp_md_v1, lcm_entry_count); + CHECK_MEMBER(lov_comp_md_v1, lcm_mirror_count); CHECK_MEMBER(lov_comp_md_v1, lcm_padding1); CHECK_MEMBER(lov_comp_md_v1, lcm_padding2); CHECK_MEMBER(lov_comp_md_v1, lcm_entries[0]); CHECK_CDEFINE(LOV_MAGIC_COMP_V1); + + CHECK_VALUE(LCM_FL_NOT_FLR); + CHECK_VALUE(LCM_FL_RDONLY); + CHECK_VALUE(LCM_FL_WRITE_PENDING); + CHECK_VALUE(LCM_FL_SYNC_PENDING); } static void diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index f62a0b0..54fdbf5 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -1724,6 +1724,8 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding)); LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n", (unsigned)LCME_FL_INIT); + LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n", + (unsigned)LCME_FL_NEG); /* Checks for struct lov_comp_md_v1 */ LASSERTF((int)sizeof(struct lov_comp_md_v1) == 32, "found %lld\n", @@ -1748,9 +1750,13 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_comp_md_v1, lcm_entry_count)); LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count) == 2, "found %lld\n", (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count)); - LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 16, "found %lld\n", + LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_mirror_count) == 16, "found %lld\n", + (long long)(int)offsetof(struct lov_comp_md_v1, lcm_mirror_count)); + LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count)); + LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 18, "found %lld\n", (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding1)); - LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 8, "found %lld\n", + LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 6, "found %lld\n", (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1)); LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding2) == 24, "found %lld\n", (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding2)); @@ -1761,6 +1767,14 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]) == 48, "found %lld\n", (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0])); CLASSERT(LOV_MAGIC_COMP_V1 == (0x0BD60000 | 0x0BD0)); + LASSERTF(LCM_FL_NOT_FLR == 0, "found %lld\n", + (long long)LCM_FL_NOT_FLR); + LASSERTF(LCM_FL_RDONLY == 1, "found %lld\n", + (long long)LCM_FL_RDONLY); + LASSERTF(LCM_FL_WRITE_PENDING == 2, "found %lld\n", + (long long)LCM_FL_WRITE_PENDING); + LASSERTF(LCM_FL_SYNC_PENDING == 3, "found %lld\n", + (long long)LCM_FL_SYNC_PENDING); /* Checks for struct lmv_mds_md_v1 */ LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",