Whamcloud - gitweb
LU-9771 flr: lfs setstripe to create a new mirror 83/29083/11
authorJinshan Xiong <jinshan.xiong@intel.com>
Fri, 15 Sep 2017 20:01:58 +0000 (20:01 +0000)
committerJinshan Xiong <jinshan.xiong@intel.com>
Thu, 16 Nov 2017 07:42:03 +0000 (07:42 +0000)
To create a mirrored file:

1. Create a component file
  lfs setstripe -E -1 /mnt/lustre/tf

2. Add a synced mirror - create a mirror with specified layout
  lfs setstripe --component-add [--mirror[=victim_file]]
    [setstripe options] <file>

  if victim_file exists, it will split the layout from that file and
  use it as a mirror; otherwise, it will create a new mirror with
  the stripe options in 'setstripe options'.

3. [todo] support to create a mirrored file directly by
  lfs setstripe --mirror [setstripe options] --mirror [options] <file>
  flag 'LCM_USER_FL_MIRROR' is reserved for this purpose.

Test-Parameters: testlist=sanity-flr
Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Change-Id: I470feeb1a77554bd2c990e94d8538fd3d03d7b3b
Reviewed-on: https://review.whamcloud.com/29083
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
26 files changed:
lustre/include/lu_object.h
lustre/include/lustre/lustreapi.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/file.c
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_object.c
lustre/lod/lod_qos.c
lustre/lov/lov_ea.c
lustre/lov/lov_internal.h
lustre/lov/lov_pack.c
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_request.c
lustre/mdd/mdd_object.c
lustre/mdt/mdt_lib.c
lustre/mdt/mdt_open.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/wiretest.c
lustre/tests/Makefile.am
lustre/tests/sanity-flr.sh [new file with mode: 0644]
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/liblustreapi_layout.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index af57228..50bad11 100644 (file)
@@ -907,7 +907,8 @@ struct lu_rdpg {
 
 enum lu_xattr_flags {
        LU_XATTR_REPLACE = (1 << 0),
 
 enum lu_xattr_flags {
        LU_XATTR_REPLACE = (1 << 0),
-       LU_XATTR_CREATE  = (1 << 1)
+       LU_XATTR_CREATE  = (1 << 1),
+       LU_XATTR_MERGE   = (1 << 2),
 };
 
 /** @} helpers */
 };
 
 /** @} helpers */
index 79f2f17..1ae9361 100644 (file)
@@ -148,13 +148,14 @@ int llapi_file_lookup(int dirfd, const char *name);
 #define VERBOSE_COMP_ID                0x2000
 #define VERBOSE_DFID           0x4000
 #define VERBOSE_HASH_TYPE      0x8000
 #define VERBOSE_COMP_ID                0x2000
 #define VERBOSE_DFID           0x4000
 #define VERBOSE_HASH_TYPE      0x8000
+#define VERBOSE_MIRROR_COUNT   0x10000
 #define VERBOSE_DEFAULT                (VERBOSE_COUNT | VERBOSE_SIZE | \
                                 VERBOSE_OFFSET | VERBOSE_POOL | \
                                 VERBOSE_OBJID | VERBOSE_GENERATION | \
                                 VERBOSE_LAYOUT | VERBOSE_HASH_TYPE | \
                                 VERBOSE_COMP_COUNT | VERBOSE_COMP_FLAGS | \
                                 VERBOSE_COMP_START | VERBOSE_COMP_END | \
 #define VERBOSE_DEFAULT                (VERBOSE_COUNT | VERBOSE_SIZE | \
                                 VERBOSE_OFFSET | VERBOSE_POOL | \
                                 VERBOSE_OBJID | VERBOSE_GENERATION | \
                                 VERBOSE_LAYOUT | VERBOSE_HASH_TYPE | \
                                 VERBOSE_COMP_COUNT | VERBOSE_COMP_FLAGS | \
                                 VERBOSE_COMP_START | VERBOSE_COMP_END | \
-                                VERBOSE_COMP_ID)
+                                VERBOSE_COMP_ID | VERBOSE_MIRROR_COUNT)
 
 struct find_param {
        unsigned int             fp_max_depth;
 
 struct find_param {
        unsigned int             fp_max_depth;
@@ -722,6 +723,11 @@ int llapi_layout_file_create(const char *path, int open_flags, int mode,
                             const struct llapi_layout *layout);
 
 /**
                             const struct llapi_layout *layout);
 
 /**
+ * Set flags to the header of component layout.
+ */
+int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags);
+
+/**
  * Fetch the start and end offset of the current layout component.
  */
 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
  * Fetch the start and end offset of the current layout component.
  */
 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
index 9635941..eaf53bd 100644 (file)
@@ -1875,8 +1875,12 @@ enum mds_op_bias {
        MDS_HSM_RELEASE         = 1 << 12,
        MDS_RENAME_MIGRATE      = 1 << 13,
        MDS_CLOSE_LAYOUT_SWAP   = 1 << 14,
        MDS_HSM_RELEASE         = 1 << 12,
        MDS_RENAME_MIGRATE      = 1 << 13,
        MDS_CLOSE_LAYOUT_SWAP   = 1 << 14,
+       MDS_CLOSE_LAYOUT_MERGE  = 1 << 15,
 };
 
 };
 
+#define MDS_CLOSE_INTENT (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |    \
+                         MDS_CLOSE_LAYOUT_MERGE)
+
 /* instance of mdt_reint_rec */
 struct mdt_rec_create {
         __u32           cr_opcode;
 /* instance of mdt_reint_rec */
 struct mdt_rec_create {
         __u32           cr_opcode;
index f4adad5..cf3c908 100644 (file)
@@ -522,6 +522,11 @@ static inline bool lu_extent_is_overlapped(struct lu_extent *e1,
        return e1->e_start < e2->e_end && e2->e_start < e1->e_end;
 }
 
        return e1->e_start < e2->e_end && e2->e_start < e1->e_end;
 }
 
+static inline bool lu_extent_is_whole(struct lu_extent *e)
+{
+       return e->e_start == 0 && e->e_end == LUSTRE_EOF;
+}
+
 enum lov_comp_md_entry_flags {
        LCME_FL_PRIMARY = 0x00000001,   /* Not used */
        LCME_FL_STALE   = 0x00000002,   /* Not used */
 enum lov_comp_md_entry_flags {
        LCME_FL_PRIMARY = 0x00000001,   /* Not used */
        LCME_FL_STALE   = 0x00000002,   /* Not used */
@@ -557,7 +562,33 @@ struct lov_comp_md_entry_v1 {
        __u64                   lcme_padding[2];
 } __attribute__((packed));
 
        __u64                   lcme_padding[2];
 } __attribute__((packed));
 
-enum lov_comp_md_flags;
+#define SEQ_ID_MAX             0x0000FFFF
+#define SEQ_ID_MASK            SEQ_ID_MAX
+/* bit 30:16 of lcme_id is used to store mirror id */
+#define MIRROR_ID_MASK         0x7FFF0000
+#define MIRROR_ID_SHIFT                16
+
+static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid)
+{
+       return ((mirror_id << MIRROR_ID_SHIFT) & MIRROR_ID_MASK) | seqid;
+}
+
+static inline __u16 mirror_id_of(__u32 id)
+{
+       return (id & MIRROR_ID_MASK) >> MIRROR_ID_SHIFT;
+}
+
+/**
+ * on-disk data for lcm_flags. Valid if lcm_magic is LOV_MAGIC_COMP_V1.
+ */
+enum lov_comp_md_flags {
+       /* the least 2 bits are used by FLR to record file state */
+       LCM_FL_NOT_FLR          = 0,
+       LCM_FL_RDONLY           = 1,
+       LCM_FL_WRITE_PENDING    = 2,
+       LCM_FL_SYNC_PENDING     = 3,
+       LCM_FL_FLR_MASK         = 0x3,
+};
 
 struct lov_comp_md_v1 {
        __u32   lcm_magic;      /* LOV_USER_MAGIC_COMP_V1 */
 
 struct lov_comp_md_v1 {
        __u32   lcm_magic;      /* LOV_USER_MAGIC_COMP_V1 */
@@ -565,11 +596,19 @@ struct lov_comp_md_v1 {
        __u32   lcm_layout_gen;
        __u16   lcm_flags;
        __u16   lcm_entry_count;
        __u32   lcm_layout_gen;
        __u16   lcm_flags;
        __u16   lcm_entry_count;
-       __u64   lcm_padding1;
+       /* lcm_mirror_count stores the number of actual mirrors minus 1,
+        * so that non-flr files will have value 0 meaning 1 mirror. */
+       __u16   lcm_mirror_count;
+       __u16   lcm_padding1[3];
        __u64   lcm_padding2;
        struct lov_comp_md_entry_v1 lcm_entries[0];
 } __attribute__((packed));
 
        __u64   lcm_padding2;
        struct lov_comp_md_entry_v1 lcm_entries[0];
 } __attribute__((packed));
 
+/*
+ * Maximum number of mirrors Lustre can support.
+ */
+#define LUSTRE_MIRROR_COUNT_MAX                16
+
 static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
 {
        if (stripes == (__u16)-1)
 static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
 {
        if (stripes == (__u16)-1)
@@ -857,6 +896,8 @@ struct if_quotactl {
 #define SWAP_LAYOUTS_KEEP_MTIME                (1 << 2)
 #define SWAP_LAYOUTS_KEEP_ATIME                (1 << 3)
 #define SWAP_LAYOUTS_CLOSE             (1 << 4)
 #define SWAP_LAYOUTS_KEEP_MTIME                (1 << 2)
 #define SWAP_LAYOUTS_KEEP_ATIME                (1 << 3)
 #define SWAP_LAYOUTS_CLOSE             (1 << 4)
+#define MERGE_LAYOUTS_CLOSE            (1 << 5)
+#define INTENT_LAYOUTS_CLOSE   (SWAP_LAYOUTS_CLOSE | MERGE_LAYOUTS_CLOSE)
 
 /* Swap XATTR_NAME_HSM as well, only on the MDT so far */
 #define SWAP_LAYOUTS_MDS_HSM           (1 << 31)
 
 /* Swap XATTR_NAME_HSM as well, only on the MDT so far */
 #define SWAP_LAYOUTS_MDS_HSM           (1 << 31)
index f2a7f1d..701b647 100644 (file)
@@ -144,9 +144,10 @@ static int ll_close_inode_openhandle(struct inode *inode,
 
        ll_prepare_close(inode, op_data, och);
        switch (bias) {
 
        ll_prepare_close(inode, op_data, och);
        switch (bias) {
+       case MDS_CLOSE_LAYOUT_MERGE:
        case MDS_CLOSE_LAYOUT_SWAP:
                LASSERT(data != NULL);
        case MDS_CLOSE_LAYOUT_SWAP:
                LASSERT(data != NULL);
-               op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
+               op_data->op_bias |= bias;
                op_data->op_data_version = 0;
                op_data->op_lease_handle = och->och_lease_handle;
                op_data->op_fid2 = *ll_inode2fid(data);
                op_data->op_data_version = 0;
                op_data->op_lease_handle = och->och_lease_handle;
                op_data->op_fid2 = *ll_inode2fid(data);
@@ -170,8 +171,7 @@ static int ll_close_inode_openhandle(struct inode *inode,
                CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
                       md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
 
                CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
                       md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
 
-       if (rc == 0 &&
-           op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
+       if (rc == 0 && op_data->op_bias & bias) {
                struct mdt_body *body;
 
                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
                struct mdt_body *body;
 
                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
@@ -914,10 +914,12 @@ static int ll_check_swap_layouts_validity(struct inode *inode1,
 }
 
 static int ll_swap_layouts_close(struct obd_client_handle *och,
 }
 
 static int ll_swap_layouts_close(struct obd_client_handle *och,
-                                struct inode *inode, struct inode *inode2)
+                                struct inode *inode, struct inode *inode2,
+                                int intent)
 {
        const struct lu_fid     *fid1 = ll_inode2fid(inode);
        const struct lu_fid     *fid2;
 {
        const struct lu_fid     *fid1 = ll_inode2fid(inode);
        const struct lu_fid     *fid2;
+       enum mds_op_bias         bias;
        int                      rc;
        ENTRY;
 
        int                      rc;
        ENTRY;
 
@@ -935,11 +937,21 @@ static int ll_swap_layouts_close(struct obd_client_handle *och,
        if (rc == 0)
                GOTO(out_free_och, rc = -EINVAL);
 
        if (rc == 0)
                GOTO(out_free_och, rc = -EINVAL);
 
-       /* Close the file and swap layouts between inode & inode2.
+       switch (intent) {
+       case SWAP_LAYOUTS_CLOSE:
+               bias = MDS_CLOSE_LAYOUT_SWAP;
+               break;
+       case MERGE_LAYOUTS_CLOSE:
+               bias = MDS_CLOSE_LAYOUT_MERGE;
+               break;
+       default:
+               GOTO(out_free_och, rc = -EOPNOTSUPP);
+       }
+
+       /* Close the file and {swap,merge} layouts between inode & inode2.
         * NB: lease lock handle is released in mdc_close_layout_swap_pack()
         * because we still need it to pack l_remote_handle to MDT. */
         * NB: lease lock handle is released in mdc_close_layout_swap_pack()
         * because we still need it to pack l_remote_handle to MDT. */
-       rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
-                                      inode2);
+       rc = ll_close_inode_openhandle(inode, och, bias, inode2);
 
        och = NULL; /* freed in ll_close_inode_openhandle() */
 
 
        och = NULL; /* freed in ll_close_inode_openhandle() */
 
@@ -2783,6 +2795,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case LL_IOC_LOV_SWAP_LAYOUTS: {
                struct file *file2;
                struct lustre_swap_layouts lsl;
        case LL_IOC_LOV_SWAP_LAYOUTS: {
                struct file *file2;
                struct lustre_swap_layouts lsl;
+               __u64 intent;
 
                if (copy_from_user(&lsl, (char __user *)arg,
                                   sizeof(struct lustre_swap_layouts)))
 
                if (copy_from_user(&lsl, (char __user *)arg,
                                   sizeof(struct lustre_swap_layouts)))
@@ -2799,14 +2812,12 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
                        GOTO(out, rc = -EPERM);
 
                if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
                        GOTO(out, rc = -EPERM);
 
-               if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
+               intent = lsl.sl_flags & INTENT_LAYOUTS_CLOSE;
+               if (intent) {
                        struct inode                    *inode2;
                        struct ll_inode_info            *lli;
                        struct obd_client_handle        *och = NULL;
 
                        struct inode                    *inode2;
                        struct ll_inode_info            *lli;
                        struct obd_client_handle        *och = NULL;
 
-                       if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
-                               GOTO(out, rc = -EINVAL);
-
                        lli = ll_i2info(inode);
                        mutex_lock(&lli->lli_och_mutex);
                        if (fd->fd_lease_och != NULL) {
                        lli = ll_i2info(inode);
                        mutex_lock(&lli->lli_och_mutex);
                        if (fd->fd_lease_och != NULL) {
@@ -2817,7 +2828,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        if (och == NULL)
                                GOTO(out, rc = -ENOLCK);
                        inode2 = file_inode(file2);
                        if (och == NULL)
                                GOTO(out, rc = -ENOLCK);
                        inode2 = file_inode(file2);
-                       rc = ll_swap_layouts_close(och, inode, inode2);
+                       rc = ll_swap_layouts_close(och, inode, inode2, intent);
                } else {
                        rc = ll_swap_layouts(file, file2, &lsl);
                }
                } else {
                        rc = ll_swap_layouts(file, file2, &lsl);
                }
index b0dfdbf..fe7d21b 100644 (file)
@@ -274,7 +274,9 @@ struct lod_object {
                        /* Layout component count for a regular file.
                         * It equals to 1 for non-composite layout. */
                        __u16           ldo_comp_cnt;
                        /* Layout component count for a regular file.
                         * It equals to 1 for non-composite layout. */
                        __u16           ldo_comp_cnt;
+                       __u16           ldo_mirror_count;
                        __u32           ldo_is_composite:1,
                        __u32           ldo_is_composite:1,
+                                       ldo_flr_state:2,
                                        ldo_comp_cached:1;
                };
                /* directory stripe (LMV) */
                                        ldo_comp_cached:1;
                };
                /* directory stripe (LMV) */
@@ -598,8 +600,8 @@ int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
                           const struct lu_buf *buf);
 int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo,
                           struct lov_ost_data_v1 *objs, int index);
                           const struct lu_buf *buf);
 int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo,
                           struct lov_ost_data_v1 *objs, int index);
-int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
-                       bool is_from_disk, __u64 start);
+int lod_verify_striping(struct lod_device *d, struct lod_object *lo,
+                       const struct lu_buf *buf, bool is_from_disk);
 int lod_generate_lovea(const struct lu_env *env, struct lod_object *lo,
                       struct lov_mds_md *lmm, int *lmm_size, bool is_dir);
 int lod_ea_store_resize(struct lod_thread_info *info, size_t size);
 int lod_generate_lovea(const struct lu_env *env, struct lod_object *lo,
                       struct lov_mds_md *lmm, int *lmm_size, bool is_dir);
 int lod_ea_store_resize(struct lod_thread_info *info, size_t size);
index f5df58d..1a3c9df 100644 (file)
@@ -943,6 +943,8 @@ int lod_generate_lovea(const struct lu_env *env, struct lod_object *lo,
        lcm = (struct lov_comp_md_v1 *)lmm;
        lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1);
        lcm->lcm_entry_count = cpu_to_le16(comp_cnt);
        lcm = (struct lov_comp_md_v1 *)lmm;
        lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1);
        lcm->lcm_entry_count = cpu_to_le16(comp_cnt);
+       lcm->lcm_mirror_count = cpu_to_le16(lo->ldo_mirror_count);
+       lcm->lcm_flags = cpu_to_le16(lo->ldo_flr_state);
 
        offset = sizeof(*lcm) + sizeof(*lcme) * comp_cnt;
        LASSERT(offset % sizeof(__u64) == 0);
 
        offset = sizeof(*lcm) + sizeof(*lcme) * comp_cnt;
        LASSERT(offset % sizeof(__u64) == 0);
@@ -1221,6 +1223,9 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo,
                        GOTO(out, rc = -EINVAL);
                lo->ldo_layout_gen = le32_to_cpu(comp_v1->lcm_layout_gen);
                lo->ldo_is_composite = 1;
                        GOTO(out, rc = -EINVAL);
                lo->ldo_layout_gen = le32_to_cpu(comp_v1->lcm_layout_gen);
                lo->ldo_is_composite = 1;
+               lo->ldo_flr_state = le16_to_cpu(comp_v1->lcm_flags) &
+                                       LCM_FL_FLR_MASK;
+               lo->ldo_mirror_count = le16_to_cpu(comp_v1->lcm_mirror_count);
        } else {
                comp_cnt = 1;
                lo->ldo_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
        } else {
                comp_cnt = 1;
                lo->ldo_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
@@ -1268,9 +1273,10 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo,
 
                if (magic == LOV_MAGIC_V3) {
                        struct lov_mds_md_v3 *v3 = (struct lov_mds_md_v3 *)lmm;
 
                if (magic == LOV_MAGIC_V3) {
                        struct lov_mds_md_v3 *v3 = (struct lov_mds_md_v3 *)lmm;
+                       lod_set_pool(&lod_comp->llc_pool, v3->lmm_pool_name);
                        objs = &v3->lmm_objects[0];
                        objs = &v3->lmm_objects[0];
-                       /* no need to set pool, which is used in create only */
                } else {
                } else {
+                       lod_set_pool(&lod_comp->llc_pool, NULL);
                        objs = &lmm->lmm_objects[0];
                }
 
                        objs = &lmm->lmm_objects[0];
                }
 
@@ -1623,8 +1629,8 @@ out:
  * \retval                     0 if the striping is valid
  * \retval                     -EINVAL if striping is invalid
  */
  * \retval                     0 if the striping is valid
  * \retval                     -EINVAL if striping is invalid
  */
-int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
-                       bool is_from_disk, __u64 start)
+int lod_verify_striping(struct lod_device *d, struct lod_object *lo,
+                       const struct lu_buf *buf, bool is_from_disk)
 {
        struct lov_user_md_v1   *lum;
        struct lov_comp_md_v1   *comp_v1;
 {
        struct lov_user_md_v1   *lum;
        struct lov_comp_md_v1   *comp_v1;
@@ -1655,8 +1661,8 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                struct lu_extent        *ext;
                struct lov_desc *desc = &d->lod_desc;
                struct lu_buf   tmp;
                struct lu_extent        *ext;
                struct lov_desc *desc = &d->lod_desc;
                struct lu_buf   tmp;
+               __u64   prev_end = 0;
                __u32   stripe_size = 0;
                __u32   stripe_size = 0;
-               __u64   prev_end = start;
 
                comp_v1 = buf->lb_buf;
                if (buf->lb_len < le32_to_cpu(comp_v1->lcm_size)) {
 
                comp_v1 = buf->lb_buf;
                if (buf->lb_len < le32_to_cpu(comp_v1->lcm_size)) {
@@ -1670,6 +1676,14 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                        RETURN(-EINVAL);
                }
 
                        RETURN(-EINVAL);
                }
 
+               if (S_ISREG(lod2lu_obj(lo)->lo_header->loh_attr) &&
+                   lo->ldo_comp_cnt > 0) {
+                       __u32 cnt = lo->ldo_comp_cnt;
+
+                       ext = &lo->ldo_comp_entries[cnt - 1].llc_extent;
+                       prev_end = ext->e_end;
+               }
+
                for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
                        ent = &comp_v1->lcm_entries[i];
                        ext = &ent->lcme_extent;
                for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
                        ent = &comp_v1->lcm_entries[i];
                        ext = &ent->lcme_extent;
@@ -1699,6 +1713,7 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                                       le64_to_cpu(ext->e_start), prev_end);
                                RETURN(-EINVAL);
                        }
                                       le64_to_cpu(ext->e_start), prev_end);
                                RETURN(-EINVAL);
                        }
+
                        prev_end = le64_to_cpu(ext->e_end);
 
                        tmp.lb_buf = (char *)comp_v1 +
                        prev_end = le64_to_cpu(ext->e_end);
 
                        tmp.lb_buf = (char *)comp_v1 +
index 6fbedf0..ef65469 100644 (file)
@@ -2017,7 +2017,7 @@ static int lod_dir_declare_xattr_set(const struct lu_env *env,
                if (rc != 0)
                        RETURN(rc);
        } else if (strcmp(name, XATTR_NAME_LOV) == 0) {
                if (rc != 0)
                        RETURN(rc);
        } else if (strcmp(name, XATTR_NAME_LOV) == 0) {
-               rc = lod_verify_striping(d, buf, false, 0);
+               rc = lod_verify_striping(d, lo, buf, false);
                if (rc != 0)
                        RETURN(rc);
        }
                if (rc != 0)
                        RETURN(rc);
        }
@@ -2226,14 +2226,12 @@ static int lod_declare_layout_add(const struct lu_env *env,
        struct lov_user_md_v3   *v3;
        struct lov_comp_md_v1   *comp_v1 = buf->lb_buf;
        __u32   magic;
        struct lov_user_md_v3   *v3;
        struct lov_comp_md_v1   *comp_v1 = buf->lb_buf;
        __u32   magic;
-       __u64   prev_end;
        int     i, rc, array_cnt;
        ENTRY;
 
        LASSERT(lo->ldo_is_composite);
 
        int     i, rc, array_cnt;
        ENTRY;
 
        LASSERT(lo->ldo_is_composite);
 
-       prev_end = lo->ldo_comp_entries[lo->ldo_comp_cnt - 1].llc_extent.e_end;
-       rc = lod_verify_striping(d, buf, false, prev_end);
+       rc = lod_verify_striping(d, lo, buf, false);
        if (rc != 0)
                RETURN(rc);
 
        if (rc != 0)
                RETURN(rc);
 
@@ -2266,6 +2264,7 @@ static int lod_declare_layout_add(const struct lu_env *env,
                lod_comp->llc_extent.e_start = ext->e_start;
                lod_comp->llc_extent.e_end = ext->e_end;
                lod_comp->llc_stripe_offset = v1->lmm_stripe_offset;
                lod_comp->llc_extent.e_start = ext->e_start;
                lod_comp->llc_extent.e_end = ext->e_end;
                lod_comp->llc_stripe_offset = v1->lmm_stripe_offset;
+               lod_comp->llc_flags = comp_v1->lcm_entries[i].lcme_flags;
 
                lod_comp->llc_stripe_count = v1->lmm_stripe_count;
                if (!lod_comp->llc_stripe_count ||
 
                lod_comp->llc_stripe_count = v1->lmm_stripe_count;
                if (!lod_comp->llc_stripe_count ||
@@ -2291,6 +2290,7 @@ static int lod_declare_layout_add(const struct lu_env *env,
        OBD_FREE(lo->ldo_comp_entries, sizeof(*lod_comp) * lo->ldo_comp_cnt);
        lo->ldo_comp_entries = comp_array;
        lo->ldo_comp_cnt = array_cnt;
        OBD_FREE(lo->ldo_comp_entries, sizeof(*lod_comp) * lo->ldo_comp_cnt);
        lo->ldo_comp_entries = comp_array;
        lo->ldo_comp_cnt = array_cnt;
+
        /* No need to increase layout generation here, it will be increased
         * later when generating component ID for the new components */
 
        /* No need to increase layout generation here, it will be increased
         * later when generating component ID for the new components */
 
@@ -2422,10 +2422,6 @@ static int lod_declare_layout_del(const struct lu_env *env,
 
        LASSERT(lo->ldo_is_composite);
 
 
        LASSERT(lo->ldo_is_composite);
 
-       rc = lod_verify_striping(d, buf, false, 0);
-       if (rc != 0)
-               RETURN(rc);
-
        magic = comp_v1->lcm_magic;
        if (magic == __swab32(LOV_USER_MAGIC_COMP_V1)) {
                lustre_swab_lov_comp_md_v1(comp_v1);
        magic = comp_v1->lcm_magic;
        if (magic == __swab32(LOV_USER_MAGIC_COMP_V1)) {
                lustre_swab_lov_comp_md_v1(comp_v1);
@@ -2592,6 +2588,139 @@ unlock:
 }
 
 /**
 }
 
 /**
+ * Merge layouts to form a mirrored file.
+ */
+static int lod_declare_layout_merge(const struct lu_env *env,
+               struct dt_object *dt, const struct lu_buf *mbuf,
+               struct thandle *th)
+{
+       struct lod_thread_info  *info = lod_env_info(env);
+       struct lu_buf           *buf = &info->lti_buf;
+       struct lod_object       *lo = lod_dt_obj(dt);
+       struct lov_comp_md_v1   *lcm;
+       struct lov_comp_md_v1   *cur_lcm;
+       struct lov_comp_md_v1   *merge_lcm;
+       struct lov_comp_md_entry_v1     *lcme;
+       size_t size = 0;
+       size_t offset;
+       __u16 cur_entry_count;
+       __u16 merge_entry_count;
+       __u32 id = 0;
+       __u16 mirror_id = 0;
+       __u32 mirror_count;
+       int     rc, i;
+       ENTRY;
+
+       merge_lcm = mbuf->lb_buf;
+       if (mbuf->lb_len < sizeof(*merge_lcm))
+               RETURN(-EINVAL);
+
+       /* must be an existing layout from disk */
+       if (le32_to_cpu(merge_lcm->lcm_magic) != LOV_MAGIC_COMP_V1)
+               RETURN(-EINVAL);
+
+       merge_entry_count = le16_to_cpu(merge_lcm->lcm_entry_count);
+
+       /* do not allow to merge two mirrored files */
+       if (le16_to_cpu(merge_lcm->lcm_mirror_count))
+               RETURN(-EBUSY);
+
+       /* verify the target buffer */
+       rc = lod_get_lov_ea(env, lo);
+       if (rc <= 0)
+               RETURN(rc ? : -ENODATA);
+
+       cur_lcm = info->lti_ea_store;
+       if (le32_to_cpu(cur_lcm->lcm_magic) != LOV_MAGIC_COMP_V1)
+               RETURN(-EINVAL);
+
+       cur_entry_count = le16_to_cpu(cur_lcm->lcm_entry_count);
+
+       /* 'lcm_mirror_count + 1' is the current # of mirrors the file has */
+       mirror_count = le16_to_cpu(cur_lcm->lcm_mirror_count) + 1;
+       if (mirror_count + 1 > LUSTRE_MIRROR_COUNT_MAX)
+               RETURN(-ERANGE);
+
+       /* size of new layout */
+       size = le32_to_cpu(cur_lcm->lcm_size) +
+              le32_to_cpu(merge_lcm->lcm_size) - sizeof(*cur_lcm);
+
+       memset(buf, 0, sizeof(*buf));
+       lu_buf_alloc(buf, size);
+       if (buf->lb_buf == NULL)
+               RETURN(-ENOMEM);
+
+       lcm = buf->lb_buf;
+       memcpy(lcm, cur_lcm, sizeof(*lcm) + cur_entry_count * sizeof(*lcme));
+
+       offset = sizeof(*lcm) +
+                sizeof(*lcme) * (cur_entry_count + merge_entry_count);
+       for (i = 0; i < cur_entry_count; i++) {
+               struct lov_comp_md_entry_v1 *cur_lcme;
+
+               lcme = &lcm->lcm_entries[i];
+               cur_lcme = &cur_lcm->lcm_entries[i];
+
+               lcme->lcme_offset = cpu_to_le32(offset);
+               memcpy((char *)lcm + offset,
+                      (char *)cur_lcm + le32_to_cpu(cur_lcme->lcme_offset),
+                      le32_to_cpu(lcme->lcme_size));
+
+               offset += le32_to_cpu(lcme->lcme_size);
+
+               if (mirror_count == 1) {
+                       /* new mirrored file, create new mirror ID */
+                       id = pflr_id(1, i + 1);
+                       lcme->lcme_id = cpu_to_le32(id);
+               }
+
+               id = MAX(le32_to_cpu(lcme->lcme_id), id);
+       }
+
+       mirror_id = mirror_id_of(id) + 1;
+       for (i = 0; i < merge_entry_count; i++) {
+               struct lov_comp_md_entry_v1 *merge_lcme;
+
+               merge_lcme = &merge_lcm->lcm_entries[i];
+               lcme = &lcm->lcm_entries[cur_entry_count + i];
+
+               *lcme = *merge_lcme;
+               lcme->lcme_offset = cpu_to_le32(offset);
+
+               id = pflr_id(mirror_id, i + 1);
+               lcme->lcme_id = cpu_to_le32(id);
+
+               memcpy((char *)lcm + offset,
+                      (char *)merge_lcm + le32_to_cpu(merge_lcme->lcme_offset),
+                      le32_to_cpu(lcme->lcme_size));
+
+               offset += le32_to_cpu(lcme->lcme_size);
+       }
+
+       /* fixup layout information */
+       lod_obj_inc_layout_gen(lo);
+       lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen);
+       lcm->lcm_size = cpu_to_le32(size);
+       lcm->lcm_entry_count = cpu_to_le16(cur_entry_count + merge_entry_count);
+       lcm->lcm_mirror_count = cpu_to_le16(mirror_count);
+       if ((le16_to_cpu(lcm->lcm_flags) & LCM_FL_FLR_MASK) == LCM_FL_NOT_FLR)
+               lcm->lcm_flags = cpu_to_le32(LCM_FL_RDONLY);
+
+       LASSERT(dt_write_locked(env, dt_object_child(dt)));
+       lod_object_free_striping(env, lo);
+       rc = lod_parse_striping(env, lo, buf);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), buf,
+                                       XATTR_NAME_LOV, LU_XATTR_REPLACE, th);
+
+out:
+       lu_buf_free(buf);
+       RETURN(rc);
+}
+
+/**
  * Implementation of dt_object_operations::do_declare_xattr_set.
  *
  * \see dt_object_operations::do_declare_xattr_set() in the API description
  * Implementation of dt_object_operations::do_declare_xattr_set.
  *
  * \see dt_object_operations::do_declare_xattr_set() in the API description
@@ -2614,7 +2743,8 @@ static int lod_declare_xattr_set(const struct lu_env *env,
        ENTRY;
 
        mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
        ENTRY;
 
        mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
-       if ((S_ISREG(mode) || mode == 0) && !(fl & LU_XATTR_REPLACE) &&
+       if ((S_ISREG(mode) || mode == 0) &&
+           !(fl & (LU_XATTR_REPLACE | LU_XATTR_MERGE)) &&
            (strcmp(name, XATTR_NAME_LOV) == 0 ||
             strcmp(name, XATTR_LUSTRE_LOV) == 0)) {
                /*
            (strcmp(name, XATTR_NAME_LOV) == 0 ||
             strcmp(name, XATTR_LUSTRE_LOV) == 0)) {
                /*
@@ -2636,6 +2766,10 @@ static int lod_declare_xattr_set(const struct lu_env *env,
                        attr->la_mode = S_IFREG;
                }
                rc = lod_declare_striped_create(env, dt, attr, buf, th);
                        attr->la_mode = S_IFREG;
                }
                rc = lod_declare_striped_create(env, dt, attr, buf, th);
+       } else if (fl & LU_XATTR_MERGE) {
+               LASSERT(strcmp(name, XATTR_NAME_LOV) == 0 ||
+                       strcmp(name, XATTR_LUSTRE_LOV) == 0);
+               rc = lod_declare_layout_merge(env, dt, buf, th);
        } else if (S_ISREG(mode) &&
                   strlen(name) > strlen(XATTR_LUSTRE_LOV) + 1 &&
                   strncmp(name, XATTR_LUSTRE_LOV,
        } else if (S_ISREG(mode) &&
                   strlen(name) > strlen(XATTR_LUSTRE_LOV) + 1 &&
                   strncmp(name, XATTR_LUSTRE_LOV,
index fbb8111..01beb59 100644 (file)
@@ -1847,7 +1847,7 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo,
        if (buf == NULL || buf->lb_buf == NULL || buf->lb_len == 0)
                RETURN(0);
 
        if (buf == NULL || buf->lb_buf == NULL || buf->lb_len == 0)
                RETURN(0);
 
-       rc = lod_verify_striping(d, buf, false, 0);
+       rc = lod_verify_striping(d, lo, buf, false);
        if (rc)
                RETURN(-EINVAL);
 
        if (rc)
                RETURN(-EINVAL);
 
index 893659c..3ee9763 100644 (file)
@@ -441,6 +441,7 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
        lsm->lsm_magic = le32_to_cpu(lcm->lcm_magic);
        lsm->lsm_layout_gen = le32_to_cpu(lcm->lcm_layout_gen);
        lsm->lsm_entry_count = entry_count;
        lsm->lsm_magic = le32_to_cpu(lcm->lcm_magic);
        lsm->lsm_layout_gen = le32_to_cpu(lcm->lcm_layout_gen);
        lsm->lsm_entry_count = entry_count;
+       lsm->lsm_mirror_count = le16_to_cpu(lcm->lcm_mirror_count);
        lsm->lsm_flags = le16_to_cpu(lcm->lcm_flags);
        lsm->lsm_is_released = true;
        lsm->lsm_maxbytes = LLONG_MIN;
        lsm->lsm_flags = le16_to_cpu(lcm->lcm_flags);
        lsm->lsm_is_released = true;
        lsm->lsm_maxbytes = LLONG_MIN;
index 6142e7c..458049c 100644 (file)
@@ -82,7 +82,8 @@ struct lov_stripe_md {
        u32             lsm_layout_gen;
        u16             lsm_flags;
        bool            lsm_is_released;
        u32             lsm_layout_gen;
        u16             lsm_flags;
        bool            lsm_is_released;
-       u32             lsm_entry_count;
+       u16             lsm_mirror_count;
+       u16             lsm_entry_count;
        struct lov_stripe_md_entry *lsm_entries[];
 };
 
        struct lov_stripe_md_entry *lsm_entries[];
 };
 
index 3796522..efcb442 100644 (file)
@@ -207,6 +207,7 @@ ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf,
        lcmv1->lcm_size = cpu_to_le32(lmm_size);
        lcmv1->lcm_layout_gen = cpu_to_le32(lsm->lsm_layout_gen);
        lcmv1->lcm_flags = cpu_to_le16(lsm->lsm_flags);
        lcmv1->lcm_size = cpu_to_le32(lmm_size);
        lcmv1->lcm_layout_gen = cpu_to_le32(lsm->lsm_layout_gen);
        lcmv1->lcm_flags = cpu_to_le16(lsm->lsm_flags);
+       lcmv1->lcm_mirror_count = cpu_to_le16(lsm->lsm_mirror_count);
        lcmv1->lcm_entry_count = cpu_to_le16(lsm->lsm_entry_count);
 
        offset = sizeof(*lcmv1) + sizeof(*lcme) * lsm->lsm_entry_count;
        lcmv1->lcm_entry_count = cpu_to_le16(lsm->lsm_entry_count);
 
        offset = sizeof(*lcmv1) + sizeof(*lcme) * lsm->lsm_entry_count;
index b38c91c..e48de25 100644 (file)
@@ -440,8 +440,7 @@ static void mdc_intent_close_pack(struct ptlrpc_request *req,
        struct ldlm_lock        *lock;
        enum mds_op_bias         bias = op_data->op_bias;
 
        struct ldlm_lock        *lock;
        enum mds_op_bias         bias = op_data->op_bias;
 
-       if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |
-                     MDS_RENAME_MIGRATE)))
+       if (!(bias & (MDS_CLOSE_INTENT | MDS_RENAME_MIGRATE)))
                return;
 
        data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
                return;
 
        data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
index 1641161..d0dfe2d 100644 (file)
@@ -776,7 +776,8 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
                        /* save the errcode and proceed to close */
                        saved_rc = rc;
                }
                        /* save the errcode and proceed to close */
                        saved_rc = rc;
                }
-       } else if (op_data->op_bias & MDS_CLOSE_LAYOUT_SWAP) {
+       } else if (op_data->op_bias & (MDS_CLOSE_LAYOUT_SWAP |
+                                      MDS_CLOSE_LAYOUT_MERGE)) {
                req_fmt = &RQF_MDS_INTENT_CLOSE;
        } else {
                req_fmt = &RQF_MDS_CLOSE;
                req_fmt = &RQF_MDS_INTENT_CLOSE;
        } else {
                req_fmt = &RQF_MDS_CLOSE;
index 0fa360d..d16f363 100644 (file)
@@ -1071,9 +1071,126 @@ free:
        return rc;
 }
 
        return rc;
 }
 
+static int mdd_declare_xattr_del(const struct lu_env *env,
+                                struct mdd_device *mdd,
+                                struct mdd_object *obj,
+                                const char *name,
+                                struct thandle *handle);
+
 static int mdd_xattr_del(const struct lu_env *env, struct md_object *obj,
                         const char *name);
 
 static int mdd_xattr_del(const struct lu_env *env, struct md_object *obj,
                         const char *name);
 
+static int mdd_xattr_merge(const struct lu_env *env, struct md_object *md_obj,
+                          struct md_object *md_vic)
+{
+       struct mdd_device *mdd = mdo2mdd(md_obj);
+       struct mdd_object *obj = md2mdd_obj(md_obj);
+       struct mdd_object *vic = md2mdd_obj(md_vic);
+       struct lu_buf *buf = &mdd_env_info(env)->mti_buf[0];
+       struct lu_buf *buf_vic = &mdd_env_info(env)->mti_buf[1];
+       struct lov_mds_md *lmm;
+       struct thandle *handle;
+       int rc;
+       ENTRY;
+
+       rc = lu_fid_cmp(mdo2fid(obj), mdo2fid(vic));
+       if (rc == 0) /* same fid */
+               RETURN(-EPERM);
+
+       handle = mdd_trans_create(env, mdd);
+       if (IS_ERR(handle))
+               RETURN(PTR_ERR(handle));
+
+       if (rc > 0) {
+               mdd_write_lock(env, obj, MOR_TGT_CHILD);
+               mdd_write_lock(env, vic, MOR_TGT_CHILD);
+       } else {
+               mdd_write_lock(env, vic, MOR_TGT_CHILD);
+               mdd_write_lock(env, obj, MOR_TGT_CHILD);
+       }
+
+       /* get EA of victim file */
+       memset(buf_vic, 0, sizeof(*buf_vic));
+       rc = mdd_get_lov_ea(env, vic, buf_vic);
+       if (rc < 0) {
+               if (rc == -ENODATA)
+                       rc = 0;
+               GOTO(out, rc);
+       }
+
+       /* parse the layout of victim file */
+       lmm = buf_vic->lb_buf;
+       if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_COMP_V1)
+               GOTO(out, rc = -EINVAL);
+
+       /* save EA of target file for restore */
+       memset(buf, 0, sizeof(*buf));
+       rc = mdd_get_lov_ea(env, obj, buf);
+       if (rc < 0)
+               GOTO(out, rc);
+
+       /* Get rid of the layout from victim object */
+       rc = mdd_declare_xattr_del(env, mdd, vic, XATTR_NAME_LOV, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = mdd_declare_xattr_set(env, mdd, obj, buf_vic, XATTR_LUSTRE_LOV,
+                                  LU_XATTR_MERGE, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = mdd_trans_start(env, mdd, handle);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       rc = mdo_xattr_set(env, obj, buf_vic, XATTR_LUSTRE_LOV, LU_XATTR_MERGE,
+                          handle);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = mdo_xattr_del(env, vic, XATTR_NAME_LOV, handle);
+       if (rc) { /* wtf? */
+               int rc2;
+
+               rc2 = mdo_xattr_set(env, obj, buf, XATTR_NAME_LOV,
+                                   LU_XATTR_REPLACE, handle);
+               if (rc2)
+                       CERROR("%s: failed to rollback of layout of: "DFID
+                              ": %d, file state unknown\n",
+                              mdd_obj_dev_name(obj), PFID(mdo2fid(obj)), rc2);
+               GOTO(out, rc);
+       }
+
+       (void)mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, obj, handle);
+       (void)mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, vic, handle);
+       EXIT;
+
+out:
+       mdd_trans_stop(env, mdd, rc, handle);
+       mdd_write_unlock(env, obj);
+       mdd_write_unlock(env, vic);
+       lu_buf_free(buf);
+       lu_buf_free(buf_vic);
+
+       return rc;
+}
+
+static int mdd_layout_merge_allowed(const struct lu_env *env,
+                                   struct md_object *target,
+                                   struct md_object *victim)
+{
+       struct mdd_object *o1 = md2mdd_obj(target);
+
+       /* cannot extend directory's LOVEA */
+       if (S_ISDIR(mdd_object_type(o1))) {
+               CERROR("%s: Don't extend directory's LOVEA, just set it.\n",
+                      mdd_obj_dev_name(o1));
+               RETURN(-EISDIR);
+       }
+
+       RETURN(0);
+}
+
 /**
  * The caller should guarantee to update the object ctime
  * after xattr_set if needed.
 /**
  * The caller should guarantee to update the object ctime
  * after xattr_set if needed.
@@ -1099,6 +1216,21 @@ static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj,
        if (rc)
                RETURN(rc);
 
        if (rc)
                RETURN(rc);
 
+       if (strcmp(name, XATTR_LUSTRE_LOV) == 0 && fl == LU_XATTR_MERGE) {
+               struct md_object *victim = buf->lb_buf;
+
+               if (buf->lb_len != sizeof(victim))
+                       RETURN(-EINVAL);
+
+               rc = mdd_layout_merge_allowed(env, obj, victim);
+               if (rc)
+                       RETURN(rc);
+
+               /* merge layout of victim as a mirror of obj's. */
+               rc = mdd_xattr_merge(env, obj, victim);
+               RETURN(rc);
+       }
+
        if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0 ||
            strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
                struct posix_acl *acl;
        if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0 ||
            strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
                struct posix_acl *acl;
index 3f3a7dc..344160b 100644 (file)
@@ -1058,7 +1058,10 @@ static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
                ma->ma_attr_flags |= MDS_CLOSE_LAYOUT_SWAP;
        else
                ma->ma_attr_flags &= ~MDS_CLOSE_LAYOUT_SWAP;
                ma->ma_attr_flags |= MDS_CLOSE_LAYOUT_SWAP;
        else
                ma->ma_attr_flags &= ~MDS_CLOSE_LAYOUT_SWAP;
-
+       if (rec->sa_bias & MDS_CLOSE_LAYOUT_MERGE)
+               ma->ma_attr_flags |= MDS_CLOSE_LAYOUT_MERGE;
+       else
+               ma->ma_attr_flags &= ~MDS_CLOSE_LAYOUT_MERGE;
        RETURN(0);
 }
 
        RETURN(0);
 }
 
@@ -1137,7 +1140,7 @@ static int mdt_intent_close_unpack(struct mdt_thread_info *info)
        struct req_capsule      *pill = info->mti_pill;
        ENTRY;
 
        struct req_capsule      *pill = info->mti_pill;
        ENTRY;
 
-       if (!(ma->ma_attr_flags & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)))
+       if (!(ma->ma_attr_flags & MDS_CLOSE_INTENT))
                RETURN(0);
 
        req_capsule_extend(pill, &RQF_MDS_INTENT_CLOSE);
                RETURN(0);
 
        req_capsule_extend(pill, &RQF_MDS_INTENT_CLOSE);
index 2b6ee7d..197b852 100644 (file)
@@ -1911,8 +1911,8 @@ out_reprocess:
        return rc;
 }
 
        return rc;
 }
 
-int mdt_close_swap_layouts(struct mdt_thread_info *info,
-                          struct mdt_object *o, struct md_attr *ma)
+int mdt_close_handle_layouts(struct mdt_thread_info *info,
+                            struct mdt_object *o, struct md_attr *ma)
 {
        struct mdt_lock_handle  *lh1 = &info->mti_lh[MDT_LH_NEW];
        struct mdt_lock_handle  *lh2 = &info->mti_lh[MDT_LH_OLD];
 {
        struct mdt_lock_handle  *lh1 = &info->mti_lh[MDT_LH_NEW];
        struct mdt_lock_handle  *lh2 = &info->mti_lh[MDT_LH_OLD];
@@ -2005,8 +2005,17 @@ int mdt_close_swap_layouts(struct mdt_thread_info *info,
                GOTO(out_unlock1, rc);
 
        /* Swap layout with orphan object */
                GOTO(out_unlock1, rc);
 
        /* Swap layout with orphan object */
-       rc = mo_swap_layouts(info->mti_env, mdt_object_child(o1),
-                            mdt_object_child(o2), 0);
+       if (ma->ma_attr_flags & MDS_CLOSE_LAYOUT_SWAP) {
+               rc = mo_swap_layouts(info->mti_env, mdt_object_child(o1),
+                                    mdt_object_child(o2), 0);
+       } else if (ma->ma_attr_flags & MDS_CLOSE_LAYOUT_MERGE) {
+               struct lu_buf *buf = &info->mti_buf;
+
+               buf->lb_len = sizeof(void *);
+               buf->lb_buf = mdt_object_child(o == o1 ? o2 : o1);
+               rc = mo_xattr_set(info->mti_env, mdt_object_child(o), buf,
+                                 XATTR_LUSTRE_LOV, LU_XATTR_MERGE);
+       }
        if (rc < 0)
                GOTO(out_unlock2, rc);
 
        if (rc < 0)
                GOTO(out_unlock2, rc);
 
@@ -2060,11 +2069,14 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd)
         struct md_attr *ma = &info->mti_attr;
         int rc = 0;
        __u64 mode;
         struct md_attr *ma = &info->mti_attr;
         int rc = 0;
        __u64 mode;
+       __u64 intent;
         ENTRY;
 
         mode = mfd->mfd_mode;
 
         ENTRY;
 
         mode = mfd->mfd_mode;
 
-       if (ma->ma_attr_flags & MDS_HSM_RELEASE) {
+       intent = ma->ma_attr_flags & MDS_CLOSE_INTENT;
+       switch (intent) {
+       case MDS_HSM_RELEASE: {
                rc = mdt_hsm_release(info, o, ma);
                if (rc < 0) {
                        CDEBUG(D_HSM, "%s: File " DFID " release failed: %d\n",
                rc = mdt_hsm_release(info, o, ma);
                if (rc < 0) {
                        CDEBUG(D_HSM, "%s: File " DFID " release failed: %d\n",
@@ -2072,10 +2084,11 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd)
                               PFID(mdt_object_fid(o)), rc);
                        /* continue to close even error occurred. */
                }
                               PFID(mdt_object_fid(o)), rc);
                        /* continue to close even error occurred. */
                }
+               break;
        }
        }
-
-       if (ma->ma_attr_flags & MDS_CLOSE_LAYOUT_SWAP) {
-               rc = mdt_close_swap_layouts(info, o, ma);
+       case MDS_CLOSE_LAYOUT_MERGE:
+       case MDS_CLOSE_LAYOUT_SWAP: {
+               rc = mdt_close_handle_layouts(info, o, ma);
                if (rc < 0) {
                        CDEBUG(D_INODE,
                               "%s: cannot swap layout of "DFID": rc=%d\n",
                if (rc < 0) {
                        CDEBUG(D_INODE,
                               "%s: cannot swap layout of "DFID": rc=%d\n",
@@ -2083,6 +2096,11 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd)
                               PFID(mdt_object_fid(o)), rc);
                        /* continue to close even if error occurred. */
                }
                               PFID(mdt_object_fid(o)), rc);
                        /* continue to close even if error occurred. */
                }
+               break;
+       }
+       default:
+               /* nothing */
+               break;
        }
 
        if (mode & FMODE_WRITE)
        }
 
        if (mode & FMODE_WRITE)
index 74262a5..7c80969 100644 (file)
@@ -2185,6 +2185,7 @@ void lustre_print_user_md(unsigned int lvl, struct lov_user_md *lum,
        CDEBUG(lvl, "\tlcm_layout_gen: %#x\n", comp_v1->lcm_layout_gen);
        CDEBUG(lvl, "\tlcm_flags: %#x\n", comp_v1->lcm_flags);
        CDEBUG(lvl, "\tlcm_entry_count: %#x\n\n", comp_v1->lcm_entry_count);
        CDEBUG(lvl, "\tlcm_layout_gen: %#x\n", comp_v1->lcm_layout_gen);
        CDEBUG(lvl, "\tlcm_flags: %#x\n", comp_v1->lcm_flags);
        CDEBUG(lvl, "\tlcm_entry_count: %#x\n\n", comp_v1->lcm_entry_count);
+       CDEBUG(lvl, "\tlcm_mirror_count: %#x\n\n", comp_v1->lcm_mirror_count);
 
        for (i = 0; i < comp_v1->lcm_entry_count; i++) {
                struct lov_comp_md_entry_v1 *ent = &comp_v1->lcm_entries[i];
 
        for (i = 0; i < comp_v1->lcm_entry_count; i++) {
                struct lov_comp_md_entry_v1 *ent = &comp_v1->lcm_entries[i];
@@ -2266,6 +2267,7 @@ void lustre_swab_lov_comp_md_v1(struct lov_comp_md_v1 *lum)
        __swab32s(&lum->lcm_layout_gen);
        __swab16s(&lum->lcm_flags);
        __swab16s(&lum->lcm_entry_count);
        __swab32s(&lum->lcm_layout_gen);
        __swab16s(&lum->lcm_flags);
        __swab16s(&lum->lcm_entry_count);
+       __swab16s(&lum->lcm_mirror_count);
        CLASSERT(offsetof(typeof(*lum), lcm_padding1) != 0);
        CLASSERT(offsetof(typeof(*lum), lcm_padding2) != 0);
 
        CLASSERT(offsetof(typeof(*lum), lcm_padding1) != 0);
        CLASSERT(offsetof(typeof(*lum), lcm_padding2) != 0);
 
index 9240f37..cdb23de 100644 (file)
@@ -1705,6 +1705,8 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding));
        LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n",
                (unsigned)LCME_FL_INIT);
                 (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding));
        LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n",
                (unsigned)LCME_FL_INIT);
+       LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n",
+               (unsigned)LCME_FL_NEG);
 
        /* Checks for struct lov_comp_md_v1 */
        LASSERTF((int)sizeof(struct lov_comp_md_v1) == 32, "found %lld\n",
 
        /* Checks for struct lov_comp_md_v1 */
        LASSERTF((int)sizeof(struct lov_comp_md_v1) == 32, "found %lld\n",
@@ -1729,9 +1731,13 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_entry_count));
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count) == 2, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count));
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_entry_count));
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count) == 2, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count));
-       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 16, "found %lld\n",
+       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_mirror_count) == 16, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_v1, lcm_mirror_count));
+       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count));
+       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 18, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding1));
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding1));
-       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 8, "found %lld\n",
+       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 6, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1));
        LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding2) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding2));
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1));
        LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding2) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding2));
@@ -1742,6 +1748,14 @@ void lustre_assert_wire_constants(void)
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]) == 48, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]));
        CLASSERT(LOV_MAGIC_COMP_V1 == (0x0BD60000 | 0x0BD0));
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]) == 48, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]));
        CLASSERT(LOV_MAGIC_COMP_V1 == (0x0BD60000 | 0x0BD0));
+       LASSERTF(LCM_FL_NOT_FLR == 0, "found %lld\n",
+                (long long)LCM_FL_NOT_FLR);
+       LASSERTF(LCM_FL_RDONLY == 1, "found %lld\n",
+                (long long)LCM_FL_RDONLY);
+       LASSERTF(LCM_FL_WRITE_PENDING == 2, "found %lld\n",
+                (long long)LCM_FL_WRITE_PENDING);
+       LASSERTF(LCM_FL_SYNC_PENDING == 3, "found %lld\n",
+                (long long)LCM_FL_SYNC_PENDING);
 
        /* Checks for struct lmv_mds_md_v1 */
        LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
 
        /* Checks for struct lmv_mds_md_v1 */
        LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
index 68dabfb..9832547 100644 (file)
@@ -38,7 +38,7 @@ noinst_SCRIPTS += setup-cifs.sh parallel-scale-cifs.sh
 noinst_SCRIPTS += posix.sh sanity-scrub.sh scrub-performance.sh ha.sh
 noinst_SCRIPTS += sanity-lfsck.sh lfsck-performance.sh
 noinst_SCRIPTS += resolveip
 noinst_SCRIPTS += posix.sh sanity-scrub.sh scrub-performance.sh ha.sh
 noinst_SCRIPTS += sanity-lfsck.sh lfsck-performance.sh
 noinst_SCRIPTS += resolveip
-noinst_SCRIPTS += sanity-hsm.sh sanity-lsnapshot.sh sanity-pfl.sh
+noinst_SCRIPTS += sanity-hsm.sh sanity-lsnapshot.sh sanity-pfl.sh sanity-flr.sh
 noinst_SCRIPTS += sanity-dom.sh dom-performance.sh
 nobase_noinst_SCRIPTS = cfg/local.sh
 nobase_noinst_SCRIPTS += test-groups/regression test-groups/regression-mpi
 noinst_SCRIPTS += sanity-dom.sh dom-performance.sh
 nobase_noinst_SCRIPTS = cfg/local.sh
 nobase_noinst_SCRIPTS += test-groups/regression test-groups/regression-mpi
diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh
new file mode 100644 (file)
index 0000000..94767b9
--- /dev/null
@@ -0,0 +1,157 @@
+#!/bin/bash
+#
+# Run select tests by setting ONLY, or as arguments to the script.
+# Skip specific tests by setting EXCEPT.
+#
+# Run test by setting NOSETUP=true when ltest has setup env for us
+set -e
+set +o posix
+
+SRCDIR=$(dirname $0)
+export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin
+
+ONLY=${ONLY:-"$*"}
+# Bug number for skipped test:
+ALWAYS_EXCEPT="$SANITY_FLR_EXCEPT"
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+
+[ "$ALWAYS_EXCEPT$EXCEPT" ] &&
+       echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
+
+TMP=${TMP:-/tmp}
+CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
+LFS=${LFS:-lfs}
+LCTL=${LCTL:-lctl}
+MULTIOP=${MULTIOP:-multiop}
+
+LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
+. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
+
+check_and_setup_lustre
+DIR=${DIR:-$MOUNT}
+assert_DIR
+
+if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ]]; then
+       skip_env "Need MDS version at least 2.7.64" && exit
+fi
+
+build_test_filter
+
+[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
+       error "\$RUNAS_ID set to 0, but \$UID is also 0!"
+check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS
+
+# global array to store mirror IDs
+declare -a mirror_array
+get_mirror_ids() {
+       local tf=$1
+       local id
+       local array
+
+       array=()
+       for id in $($LFS getstripe $tf | awk '/lcme_id/{print $2}'); do
+               array[${#array[@]}]=$((id >> 16))
+       done
+
+       mirror_array=($(printf "%s\n" "${array[@]}" | sort -u))
+
+       echo ${#mirror_array[@]}
+}
+
+# command line test cases
+test_1() {
+       local tf=$DIR/$tfile
+       local mirror_count=16 # LUSTRE_MIRROR_COUNT_MAX
+
+       $LFS setstripe -E EOF -c -1 $tf
+
+       local stripes[0]=$OSTCOUNT
+
+       for ((i = 1; i < $mirror_count; i++)); do
+               # add mirrors with different stripes to the file
+               stripes[$i]=$((RANDOM % OSTCOUNT))
+               [ ${stripes[$i]} -eq 0 ] && stripes[$i]=1
+
+               $LFS setstripe --component-add --mirror -c ${stripes[$i]} $tf
+       done
+
+       [ $(get_mirror_ids $tf) -ne $mirror_count ] &&
+               error "mirror count error"
+
+       # can't create mirrors exceeding LUSTRE_MIRROR_COUNT_MAX
+       $LFS setstripe --component-add --mirror $tf &&
+               error "Creating the $((mirror_count+1))th mirror succeeded"
+
+       local ids=($($LFS getstripe $tf | awk '/lcme_id/{print $2}' |
+                       tr '\n' ' '))
+
+       # verify the range of components and stripe counts
+       for ((i = 0; i < $mirror_count; i++)); do
+               local sc=$($LFS getstripe -I${ids[$i]} -c $tf)
+               local start=$($LFS getstripe -I${ids[$i]} --component-start $tf)
+               local end=$($LFS getstripe -I${ids[$i]} --component-end $tf)
+
+               [[ ${stripes[$i]} = $sc ]] || {
+                       $LFS getstripe -v $tf;
+                       error "$i: sc error: id: ${ids[$i]}, ${stripes[$i]}";
+               }
+               [ $start -eq 0 ] || {
+                       $LFS getstripe -v $tf;
+                       error "$i: start error id: ${ids[$i]}";
+               }
+               [ $end = "EOF" ] || {
+                       $LFS getstripe -v $tf;
+                       error "$i: end error id: ${ids[$i]}";
+               }
+       done
+}
+run_test 1 "create components with setstripe options"
+
+test_2() {
+       local tf=$DIR/$tfile
+       local tf2=$DIR/$tfile-2
+
+       $LFS setstripe -E 1M -E EOF -c 1 $tf
+       $LFS setstripe -E 2M -E EOF -c -1 $tf2
+
+       local layout=$($LFS getstripe $tf2 | grep -A 4 lmm_objects)
+
+       $LFS setstripe --component-add --mirror=$tf2 $tf
+
+       [ $(get_mirror_ids $tf) -ne 2 ] && error "mirror count should be 2"
+       $LFS getstripe $tf2 | grep -q 'no stripe info' ||
+               error "$tf2 still has stripe info"
+}
+run_test 2 "create components from existing files"
+
+test_3() {
+       [[ $MDSCOUNT -lt 2 ]] && skip "need >= 2 MDTs" && return
+
+       for ((i = 0; i < 2; i++)); do
+               $LFS mkdir -i $i $DIR/$tdir-$i
+               $LFS setstripe -E -1 $DIR/$tdir-$i/$tfile
+       done
+
+       $LFS setstripe --component-add --mirror=$DIR/$tdir-1/$tfile \
+               $DIR/$tdir-0/$tfile || error "creating mirrors"
+
+       # mdt doesn't support to cancel layout lock for remote objects, do
+       # it here manually.
+       cancel_lru_locks mdc
+
+       # make sure the mirrorted file was created successfully
+       [[ $($LFS getstripe --component-count $DIR/$tdir-0/$tfile) -eq 2 ]] ||
+               { $LFS getstripe $DIR/$tdir-0/$tfile;
+                       error "expected 2 components"; }
+
+       # cleanup
+       rm -rf $DIR/$tdir-*
+}
+run_test 3 "create components from files located on different MDTs"
+
+complete $SECONDS
+check_and_cleanup_lustre
+exit_status
index 026e36e..32c403b 100644 (file)
@@ -166,7 +166,6 @@ static int lfs_list_commands(int argc, char **argv);
        "\tmode: the mode of the directory\n"
 
 static const char      *progname;
        "\tmode: the mode of the directory\n"
 
 static const char      *progname;
-static bool             file_lease_supported = true;
 
 /* all available commands */
 command_t cmdlist[] = {
 
 /* all available commands */
 command_t cmdlist[] = {
@@ -411,8 +410,6 @@ command_t cmdlist[] = {
 };
 
 
 };
 
 
-#define MIGRATION_NONBLOCK     1
-
 static int check_hashtype(const char *hashtype)
 {
        int i;
 static int check_hashtype(const char *hashtype)
 {
        int i;
@@ -424,47 +421,148 @@ static int check_hashtype(const char *hashtype)
        return 0;
 }
 
        return 0;
 }
 
-/**
- * Internal helper for migrate_copy_data(). Check lease and report error if
- * need be.
- *
- * \param[in]  fd           File descriptor on which to check the lease.
- * \param[out] lease_broken Set to true if the lease was broken.
- * \param[in]  group_locked Whether a group lock was taken or not.
- * \param[in]  path         Name of the file being processed, for error
- *                         reporting
- *
- * \retval 0       Migration can keep on going.
- * \retval -errno  Error occurred, abort migration.
- */
-static int check_lease(int fd, bool *lease_broken, bool group_locked,
-                      const char *path)
+
+static const char *error_loc = "syserror";
+
+enum {
+       MIGRATION_NONBLOCK      = 1 << 0,
+       MIGRATION_MIRROR        = 1 << 1,
+};
+
+static int lfs_component_create(char *fname, int open_flags, mode_t open_mode,
+                               struct llapi_layout *layout);
+
+static int
+migrate_open_files(const char *name, const struct llapi_stripe_param *param,
+                  struct llapi_layout *layout, int *fd_src, int *fd_tgt)
 {
 {
-       int rc;
+       int                      fd = -1;
+       int                      fdv = -1;
+       int                      mdt_index;
+       int                      random_value;
+       char                     parent[PATH_MAX];
+       char                     volatile_file[PATH_MAX];
+       char                    *ptr;
+       int                      rc;
+       struct stat              st;
+       struct stat              stv;
 
 
-       if (!file_lease_supported)
-               return 0;
+       if (param == NULL && layout == NULL) {
+               error_loc = "layout information";
+               return -EINVAL;
+       }
 
 
-       rc = llapi_lease_check(fd);
-       if (rc > 0)
-               return 0; /* llapi_check_lease returns > 0 on success. */
+       /* search for file directory pathname */
+       if (strlen(name) > sizeof(parent) - 1) {
+               error_loc = "source file name";
+               return -ERANGE;
+       }
 
 
-       if (!group_locked) {
-               fprintf(stderr, "%s: cannot migrate '%s': file busy\n",
-                       progname, path);
-               rc = rc ? rc : -EAGAIN;
+       strncpy(parent, name, sizeof(parent));
+       ptr = strrchr(parent, '/');
+       if (ptr == NULL) {
+               if (getcwd(parent, sizeof(parent)) == NULL) {
+                       error_loc = "getcwd";
+                       return -errno;
+               }
        } else {
        } else {
-               fprintf(stderr, "%s: external attempt to access file '%s' "
-                       "blocked until migration ends.\n", progname, path);
-               rc = 0;
+               if (ptr == parent) /* leading '/' */
+                       ptr = parent + 1;
+               *ptr = '\0';
+       }
+
+       /* open file, direct io */
+       /* even if the file is only read, WR mode is nedeed to allow
+        * layout swap on fd */
+       fd = open(name, O_RDWR | O_DIRECT);
+       if (fd < 0) {
+               rc = -errno;
+               error_loc = "cannot open source file";
+               return rc;
+       }
+
+       rc = llapi_file_fget_mdtidx(fd, &mdt_index);
+       if (rc < 0) {
+               error_loc = "cannot get MDT index";
+               goto out;
+       }
+
+       do {
+               int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW;
+               mode_t open_mode = S_IRUSR | S_IWUSR;
+
+               random_value = random();
+               rc = snprintf(volatile_file, sizeof(volatile_file),
+                             "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR,
+                             mdt_index, random_value);
+               if (rc >= sizeof(volatile_file)) {
+                       rc = -ENAMETOOLONG;
+                       break;
+               }
+
+               /* create, open a volatile file, use caching (ie no directio) */
+               if (param != NULL)
+                       fdv = llapi_file_open_param(volatile_file, open_flags,
+                                                   open_mode, param);
+               else
+                       fdv = lfs_component_create(volatile_file, open_flags,
+                                                  open_mode, layout);
+       } while (fdv < 0 && (rc = fdv) == -EEXIST);
+
+       if (rc < 0) {
+               error_loc = "cannot create volatile file";
+               goto out;
+       }
+
+       /* In case the MDT does not support creation of volatile files
+        * we should try to unlink it. */
+       (void)unlink(volatile_file);
+
+       /* Not-owner (root?) special case.
+        * Need to set owner/group of volatile file like original.
+        * This will allow to pass related check during layout_swap.
+        */
+       rc = fstat(fd, &st);
+       if (rc != 0) {
+               rc = -errno;
+               error_loc = "cannot stat source file";
+               goto out;
+       }
+
+       rc = fstat(fdv, &stv);
+       if (rc != 0) {
+               rc = -errno;
+               error_loc = "cannot stat volatile";
+               goto out;
+       }
+
+       if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) {
+               rc = fchown(fdv, st.st_uid, st.st_gid);
+               if (rc != 0) {
+                       rc = -errno;
+                       error_loc = "cannot change ownwership of volatile";
+                       goto out;
+               }
+       }
+
+out:
+       if (rc < 0) {
+               if (fd > 0)
+                       close(fd);
+               if (fdv > 0)
+                       close(fdv);
+       } else {
+               *fd_src = fd;
+               *fd_tgt = fdv;
+               error_loc = NULL;
        }
        }
-       *lease_broken = true;
        return rc;
 }
 
        return rc;
 }
 
-static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size,
-                            bool group_locked, const char *fname)
+static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int))
 {
 {
+       struct llapi_layout *layout;
+       size_t   buf_size = 4 * 1024 * 1024;
        void    *buf = NULL;
        ssize_t  rsize = -1;
        ssize_t  wsize = 0;
        void    *buf = NULL;
        ssize_t  rsize = -1;
        ssize_t  wsize = 0;
@@ -472,7 +570,17 @@ static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size,
        size_t   wpos = 0;
        off_t    bufoff = 0;
        int      rc;
        size_t   wpos = 0;
        off_t    bufoff = 0;
        int      rc;
-       bool     lease_broken = false;
+
+       layout = llapi_layout_get_by_fd(fd_src, 0);
+       if (layout != NULL) {
+               uint64_t stripe_size;
+
+               rc = llapi_layout_stripe_size_get(layout, &stripe_size);
+               if (rc == 0)
+                       buf_size = stripe_size;
+
+               llapi_layout_free(layout);
+       }
 
        /* Use a page-aligned buffer for direct I/O */
        rc = posix_memalign(&buf, getpagesize(), buf_size);
 
        /* Use a page-aligned buffer for direct I/O */
        rc = posix_memalign(&buf, getpagesize(), buf_size);
@@ -483,18 +591,16 @@ static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size,
                /* read new data only if we have written all
                 * previously read data */
                if (wpos == rpos) {
                /* read new data only if we have written all
                 * previously read data */
                if (wpos == rpos) {
-                       if (!lease_broken) {
-                               rc = check_lease(fd_src, &lease_broken,
-                                                group_locked, fname);
+                       if (check_file) {
+                               rc = check_file(fd_src);
                                if (rc < 0)
                                if (rc < 0)
-                                       goto out;
+                                       break;
                        }
                        }
+
                        rsize = read(fd_src, buf, buf_size);
                        if (rsize < 0) {
                                rc = -errno;
                        rsize = read(fd_src, buf, buf_size);
                        if (rsize < 0) {
                                rc = -errno;
-                               fprintf(stderr, "%s: %s: read failed: %s\n",
-                                       progname, fname, strerror(-rc));
-                               goto out;
+                               break;
                        }
                        rpos += rsize;
                        bufoff = 0;
                        }
                        rpos += rsize;
                        bufoff = 0;
@@ -506,39 +612,39 @@ static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size,
                wsize = write(fd_dst, buf + bufoff, rpos - wpos);
                if (wsize < 0) {
                        rc = -errno;
                wsize = write(fd_dst, buf + bufoff, rpos - wpos);
                if (wsize < 0) {
                        rc = -errno;
-                       fprintf(stderr,
-                               "%s: %s: write failed on volatile: %s\n",
-                               progname, fname, strerror(-rc));
-                       goto out;
+                       break;
                }
                wpos += wsize;
                bufoff += wsize;
        }
 
                }
                wpos += wsize;
                bufoff += wsize;
        }
 
-       rc = fsync(fd_dst);
-       if (rc < 0) {
-               rc = -errno;
-               fprintf(stderr, "%s: %s: fsync failed: %s\n",
-                       progname, fname, strerror(-rc));
+       if (rc == 0) {
+               rc = fsync(fd_dst);
+               if (rc < 0)
+                       rc = -errno;
        }
 
        }
 
-out:
        free(buf);
        return rc;
 }
 
        free(buf);
        return rc;
 }
 
-static int migrate_copy_timestamps(int fdv, const struct stat *st)
+static int migrate_copy_timestamps(int fd, int fdv)
 {
 {
-       struct timeval  tv[2] = {
-               {.tv_sec = st->st_atime},
-               {.tv_sec = st->st_mtime}
-       };
+       struct stat st;
+
+       if (fstat(fd, &st) == 0) {
+               struct timeval tv[2] = {
+                       {.tv_sec = st.st_atime},
+                       {.tv_sec = st.st_mtime}
+               };
 
 
-       return futimes(fdv, tv);
+               return futimes(fdv, tv);
+       }
+
+       return -errno;
 }
 
 }
 
-static int migrate_block(int fd, int fdv, const struct stat *st,
-                        size_t buf_size, const char *name)
+static int migrate_block(int fd, int fdv)
 {
        __u64   dv1;
        int     gid;
 {
        __u64   dv1;
        int     gid;
@@ -547,8 +653,7 @@ static int migrate_block(int fd, int fdv, const struct stat *st,
 
        rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
        if (rc < 0) {
 
        rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot get dataversion: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "cannot get dataversion";
                return rc;
        }
 
                return rc;
        }
 
@@ -561,22 +666,20 @@ static int migrate_block(int fd, int fdv, const struct stat *st,
         * block it too. */
        rc = llapi_group_lock(fd, gid);
        if (rc < 0) {
         * block it too. */
        rc = llapi_group_lock(fd, gid);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot get group lock: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "cannot get group lock";
                return rc;
        }
 
                return rc;
        }
 
-       rc = migrate_copy_data(fd, fdv, buf_size, true, name);
+       rc = migrate_copy_data(fd, fdv, NULL);
        if (rc < 0) {
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: data copy failed\n", progname, name);
+               error_loc = "data copy failed";
                goto out_unlock;
        }
 
        /* Make sure we keep original atime/mtime values */
                goto out_unlock;
        }
 
        /* Make sure we keep original atime/mtime values */
-       rc = migrate_copy_timestamps(fdv, st);
+       rc = migrate_copy_timestamps(fd, fdv);
        if (rc < 0) {
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: timestamp copy failed\n",
-                       progname, name);
+               error_loc = "timestamp copy failed";
                goto out_unlock;
        }
 
                goto out_unlock;
        }
 
@@ -588,28 +691,44 @@ static int migrate_block(int fd, int fdv, const struct stat *st,
        rc = llapi_fswap_layouts_grouplock(fd, fdv, dv1, 0, 0,
                                           SWAP_LAYOUTS_CHECK_DV1);
        if (rc == -EAGAIN) {
        rc = llapi_fswap_layouts_grouplock(fd, fdv, dv1, 0, 0,
                                           SWAP_LAYOUTS_CHECK_DV1);
        if (rc == -EAGAIN) {
-               fprintf(stderr, "%s: %s: dataversion changed during copy, "
-                       "migration aborted\n", progname, name);
+               error_loc = "file changed";
                goto out_unlock;
        } else if (rc < 0) {
                goto out_unlock;
        } else if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot swap layouts: %s\n", progname,
-                       name, strerror(-rc));
+               error_loc = "cannot swap layout";
                goto out_unlock;
        }
 
 out_unlock:
        rc2 = llapi_group_unlock(fd, gid);
        if (rc2 < 0 && rc == 0) {
                goto out_unlock;
        }
 
 out_unlock:
        rc2 = llapi_group_unlock(fd, gid);
        if (rc2 < 0 && rc == 0) {
-               fprintf(stderr, "%s: %s: putting group lock failed: %s\n",
-                       progname, name, strerror(-rc2));
+               error_loc = "unlock group lock";
                rc = rc2;
        }
 
        return rc;
 }
 
                rc = rc2;
        }
 
        return rc;
 }
 
-static int migrate_nonblock(int fd, int fdv, const struct stat *st,
-                           size_t buf_size, const char *name)
+/**
+ * Internal helper for migrate_copy_data(). Check lease and report error if
+ * need be.
+ *
+ * \param[in]  fd           File descriptor on which to check the lease.
+ *
+ * \retval 0       Migration can keep on going.
+ * \retval -errno  Error occurred, abort migration.
+ */
+static int check_lease(int fd)
+{
+       int rc;
+
+       rc = llapi_lease_check(fd);
+       if (rc > 0)
+               return 0; /* llapi_check_lease returns > 0 on success. */
+
+       return -EBUSY;
+}
+
+static int migrate_nonblock(int fd, int fdv)
 {
        __u64   dv1;
        __u64   dv2;
 {
        __u64   dv1;
        __u64   dv2;
@@ -617,47 +736,32 @@ static int migrate_nonblock(int fd, int fdv, const struct stat *st,
 
        rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
        if (rc < 0) {
 
        rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot get data version: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "cannot get data version";
                return rc;
        }
 
                return rc;
        }
 
-       rc = migrate_copy_data(fd, fdv, buf_size, false, name);
+       rc = migrate_copy_data(fd, fdv, check_lease);
        if (rc < 0) {
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: data copy failed\n", progname, name);
+               error_loc = "data copy failed";
                return rc;
        }
 
        rc = llapi_get_data_version(fd, &dv2, LL_DV_RD_FLUSH);
        if (rc != 0) {
                return rc;
        }
 
        rc = llapi_get_data_version(fd, &dv2, LL_DV_RD_FLUSH);
        if (rc != 0) {
-               fprintf(stderr, "%s: %s: cannot get data version: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "cannot get data version";
                return rc;
        }
 
        if (dv1 != dv2) {
                rc = -EAGAIN;
                return rc;
        }
 
        if (dv1 != dv2) {
                rc = -EAGAIN;
-               fprintf(stderr, "%s: %s: data version changed during "
-                               "migration\n",
-                       progname, name);
+               error_loc = "source file changed";
                return rc;
        }
 
        /* Make sure we keep original atime/mtime values */
                return rc;
        }
 
        /* Make sure we keep original atime/mtime values */
-       rc = migrate_copy_timestamps(fdv, st);
+       rc = migrate_copy_timestamps(fd, fdv);
        if (rc < 0) {
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: timestamp copy failed\n",
-                       progname, name);
-               return rc;
-       }
-
-       /* Atomically put lease, swap layouts and close.
-        * for a migration we need to check data version on file did
-        * not change. */
-       rc = llapi_fswap_layouts(fd, fdv, 0, 0, SWAP_LAYOUTS_CLOSE);
-       if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot swap layouts: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "timestamp copy failed";
                return rc;
        }
 
                return rc;
        }
 
@@ -735,190 +839,147 @@ static int lfs_migrate(char *name, __u64 migration_flags,
                       struct llapi_stripe_param *param,
                       struct llapi_layout *layout)
 {
                       struct llapi_stripe_param *param,
                       struct llapi_layout *layout)
 {
-       int                      fd = -1;
-       int                      fdv = -1;
-       char                     parent[PATH_MAX];
-       int                      mdt_index;
-       int                      random_value;
-       char                     volatile_file[sizeof(parent) +
-                                              LUSTRE_VOLATILE_HDR_LEN +
-                                              2 * sizeof(mdt_index) +
-                                              2 * sizeof(random_value) + 4];
-       char                    *ptr;
-       int                      rc;
-       struct lov_user_md      *lum = NULL;
-       int                      lum_size;
-       int                      buf_size = 1024 * 1024 * 4;
-       bool                     have_lease_rdlck = false;
-       struct stat              st;
-       struct stat              stv;
+       int fd = -1;
+       int fdv = -1;
+       int rc;
 
 
-       /* find the right size for the IO and allocate the buffer */
-       lum_size = lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3);
-       lum = malloc(lum_size);
-       if (lum == NULL) {
-               rc = -ENOMEM;
-               goto free;
-       }
+       rc = migrate_open_files(name, param, layout, &fd, &fdv);
+       if (rc < 0)
+               goto out;
 
 
-       rc = llapi_file_get_stripe(name, lum);
-       /* failure can happen for many reasons and some may be not real errors
-        * (eg: no stripe)
-        * in case of a real error, a later call will fail with better
-        * error management */
-       if (rc == 0) {
-               if ((lum->lmm_magic == LOV_USER_MAGIC_V1 ||
-                    lum->lmm_magic == LOV_USER_MAGIC_V3) &&
-                   lum->lmm_stripe_size != 0)
-                       buf_size = lum->lmm_stripe_size;
+       if (!(migration_flags & MIGRATION_NONBLOCK)) {
+               /* Blocking mode (forced if servers do not support file lease).
+                * It is also the default mode, since we cannot distinguish
+                * between a broken lease and a server that does not support
+                * atomic swap/close (LU-6785) */
+               rc = migrate_block(fd, fdv);
+               goto out;
        }
 
        }
 
-       /* open file, direct io */
-       /* even if the file is only read, WR mode is nedeed to allow
-        * layout swap on fd */
-       fd = open(name, O_RDWR | O_DIRECT);
-       if (fd == -1) {
-               rc = -errno;
-               fprintf(stderr, "%s: cannot open '%s': %s\n", progname, name,
-                       strerror(-rc));
-               goto free;
-       }
-
-       if (file_lease_supported) {
-               rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
-               if (rc == -EOPNOTSUPP) {
-                       /* Older servers do not support file lease.
-                        * Disable related checks. This opens race conditions
-                        * as explained in LU-4840 */
-                       file_lease_supported = false;
-               } else if (rc < 0) {
-                       fprintf(stderr, "%s: %s: cannot get open lease: %s\n",
-                               progname, name, strerror(-rc));
-                       goto error;
-               } else {
-                       have_lease_rdlck = true;
-               }
+       rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
+       if (rc < 0) {
+               error_loc = "cannot get lease";
+               goto out;
        }
 
        }
 
-       /* search for file directory pathname */
-       if (strlen(name) > sizeof(parent)-1) {
-               rc = -E2BIG;
-               goto error;
-       }
-       strncpy(parent, name, sizeof(parent));
-       ptr = strrchr(parent, '/');
-       if (ptr == NULL) {
-               if (getcwd(parent, sizeof(parent)) == NULL) {
-                       rc = -errno;
-                       goto error;
-               }
-       } else {
-               if (ptr == parent)
-                       strcpy(parent, "/");
-               else
-                       *ptr = '\0';
+       rc = migrate_nonblock(fd, fdv);
+       if (rc < 0) {
+               llapi_lease_put(fd);
+               goto out;
        }
 
        }
 
-       rc = llapi_file_fget_mdtidx(fd, &mdt_index);
+       /* Atomically put lease, swap layouts and close.
+        * for a migration we need to check data version on file did
+        * not change. */
+       rc = llapi_fswap_layouts(fd, fdv, 0, 0,
+                                migration_flags & MIGRATION_MIRROR ?
+                                MERGE_LAYOUTS_CLOSE : SWAP_LAYOUTS_CLOSE);
        if (rc < 0) {
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot get MDT index: %s\n",
-                       progname, name, strerror(-rc));
-               goto error;
+               error_loc = "cannot swap layout";
+               goto out;
        }
 
        }
 
-       do {
-               int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW;
-               mode_t open_mode = S_IRUSR | S_IWUSR;
+out:
+       if (fd >= 0)
+               close(fd);
 
 
-               random_value = random();
-               rc = snprintf(volatile_file, sizeof(volatile_file),
-                             "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR,
-                             mdt_index, random_value);
-               if (rc >= sizeof(volatile_file)) {
-                       rc = -E2BIG;
-                       goto error;
-               }
+       if (fdv >= 0)
+               close(fdv);
 
 
-               /* create, open a volatile file, use caching (ie no directio) */
-               if (param != NULL)
-                       fdv = llapi_file_open_param(volatile_file, open_flags,
-                                                   open_mode, param);
-               else if (layout != NULL)
-                       fdv = lfs_component_create(volatile_file, open_flags,
-                                                  open_mode, layout);
-               else
-                       fdv = -EINVAL;
-       } while (fdv == -EEXIST);
+       if (rc < 0)
+               fprintf(stderr, "error: %s: %s: %s: %s\n",
+                       progname, name, error_loc, strerror(-rc));
+       return rc;
+}
 
 
-       if (fdv < 0) {
-               rc = fdv;
-               fprintf(stderr, "%s: %s: cannot create volatile file in"
-                               " directory: %s\n",
-                       progname, parent, strerror(-rc));
-               goto error;
+static int lfs_create_mirror(char *fname, struct llapi_layout *layout,
+                            const char *mirror_file)
+{
+       int fd = -1;
+       int fdv = -1;
+       struct stat stbuf;
+       struct stat stbuf_v;
+       __u64 dv;
+       int rc;
+
+       if (mirror_file == NULL)
+               return lfs_migrate(fname, MIGRATION_NONBLOCK | MIGRATION_MIRROR,
+                                  NULL, layout);
+
+       fd = open(fname, O_RDWR);
+       if (fd < 0) {
+               error_loc = "open source file";
+               rc = -errno;
+               goto out;
        }
 
        }
 
-       /* In case the MDT does not support creation of volatile files
-        * we should try to unlink it. */
-       (void)unlink(volatile_file);
+       /* Get rid of caching pages from clients */
+       rc = llapi_get_data_version(fd, &dv, LL_DV_WR_FLUSH);
+       if (rc < 0) {
+               error_loc = "cannot get data version";
+               return rc;
+       }
 
 
-       /* Not-owner (root?) special case.
-        * Need to set owner/group of volatile file like original.
-        * This will allow to pass related check during layout_swap.
-        */
-       rc = fstat(fd, &st);
-       if (rc != 0) {
+       fdv = open(mirror_file, O_WRONLY);
+       if (fdv < 0) {
+               error_loc = "open target file";
                rc = -errno;
                rc = -errno;
-               fprintf(stderr, "%s: %s: cannot stat: %s\n", progname, name,
-                       strerror(errno));
-               goto error;
+               goto out;
        }
        }
-       rc = fstat(fdv, &stv);
-       if (rc != 0) {
+
+       rc = llapi_get_data_version(fdv, &dv, LL_DV_WR_FLUSH);
+       if (rc < 0) {
+               error_loc = "cannot get data version";
+               return rc;
+       }
+
+       if (fstat(fd, &stbuf) || fstat(fdv, &stbuf_v)) {
+               error_loc = "stat source or target file";
                rc = -errno;
                rc = -errno;
-               fprintf(stderr, "%s: %s: cannot stat: %s\n", progname,
-                       volatile_file, strerror(errno));
-               goto error;
+               goto out;
        }
        }
-       if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) {
-               rc = fchown(fdv, st.st_uid, st.st_gid);
-               if (rc != 0) {
-                       rc = -errno;
-                       fprintf(stderr, "%s: %s: cannot chown: %s\n", progname,
-                               name, strerror(errno));
-                       goto error;
-               }
+
+       if (stbuf.st_dev != stbuf_v.st_dev) {
+               error_loc = "stat source and target file";
+               rc = EXDEV;
+               goto out;
        }
 
        }
 
-       if (migration_flags & MIGRATION_NONBLOCK && file_lease_supported) {
-               rc = migrate_nonblock(fd, fdv, &st, buf_size, name);
-               if (rc == 0) {
-                       have_lease_rdlck = false;
-                       fdv = -1; /* The volatile file is closed as we put the
-                                  * lease in non-blocking mode. */
-               }
-       } else {
-               /* Blocking mode (forced if servers do not support file lease).
-                * It is also the default mode, since we cannot distinguish
-                * between a broken lease and a server that does not support
-                * atomic swap/close (LU-6785) */
-               rc = migrate_block(fd, fdv, &st, buf_size, name);
+       /* mirrors should be of the same size */
+       if (stbuf.st_size != stbuf_v.st_size) {
+               error_loc = "file sizes don't match";
+               rc = -EINVAL;
+               goto out;
        }
 
        }
 
-error:
-       if (have_lease_rdlck)
-               llapi_lease_put(fd);
+       rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
+       if (rc < 0) {
+               error_loc = "cannot get lease";
+               goto out;
+       }
+
+       /* Make sure we keep original atime/mtime values */
+       rc = migrate_copy_timestamps(fd, fdv);
+
+       /* Atomically put lease, swap layouts and close.
+        * for a migration we need to check data version on file did
+        * not change. */
+       rc = llapi_fswap_layouts(fd, fdv, 0, 0, MERGE_LAYOUTS_CLOSE);
+       if (rc < 0) {
+               error_loc = "cannot swap layout";
+               goto out;
+       }
 
 
+out:
        if (fd >= 0)
                close(fd);
 
        if (fdv >= 0)
                close(fdv);
 
        if (fd >= 0)
                close(fd);
 
        if (fdv >= 0)
                close(fdv);
 
-free:
-       if (lum)
-               free(lum);
-
+       if (rc < 0)
+               fprintf(stderr, "error: %s: %s: %s: %s\n",
+                       progname, fname, error_loc, strerror(-rc));
        return rc;
 }
 
        return rc;
 }
 
@@ -1365,6 +1426,8 @@ static int lfs_setstripe(int argc, char **argv)
        int                              comp_add = 0;
        __u32                            comp_id = 0;
        struct llapi_layout             *layout = NULL;
        int                              comp_add = 0;
        __u32                            comp_id = 0;
        struct llapi_layout             *layout = NULL;
+       bool                             create_mirror = false;
+       const char                      *mirror_file = NULL;
 
        struct option long_opts[] = {
                /* --block is only valid in migrate mode */
 
        struct option long_opts[] = {
                /* --block is only valid in migrate mode */
@@ -1404,6 +1467,7 @@ static int lfs_setstripe(int argc, char **argv)
        { .val = 'm',   .name = "mdt",          .has_arg = required_argument},
        { .val = 'm',   .name = "mdt-index",    .has_arg = required_argument},
        { .val = 'm',   .name = "mdt_index",    .has_arg = required_argument},
        { .val = 'm',   .name = "mdt",          .has_arg = required_argument},
        { .val = 'm',   .name = "mdt-index",    .has_arg = required_argument},
        { .val = 'm',   .name = "mdt_index",    .has_arg = required_argument},
+       { .val = 'M',   .name = "mirror",       .has_arg = optional_argument},
        /* --non-block is only valid in migrate mode */
        { .val = 'n',   .name = "non-block",    .has_arg = no_argument},
        { .val = 'o',   .name = "ost",          .has_arg = required_argument},
        /* --non-block is only valid in migrate mode */
        { .val = 'n',   .name = "non-block",    .has_arg = no_argument},
        { .val = 'o',   .name = "ost",          .has_arg = required_argument},
@@ -1478,6 +1542,15 @@ static int lfs_setstripe(int argc, char **argv)
                        /* delete the default striping pattern */
                        delete = 1;
                        break;
                        /* delete the default striping pattern */
                        delete = 1;
                        break;
+               case 'M':
+                       if (create_mirror) {
+                               fprintf(stderr, "error: %s: --mirror can only "
+                                       "be specfied once", argv[0]);
+                               goto error;
+                       }
+                       create_mirror = true;
+                       mirror_file = optarg;
+                       break;
                case 'E':
                        if (lsa.lsa_comp_end != 0) {
                                result = comp_args_to_layout(&layout, &lsa);
                case 'E':
                        if (lsa.lsa_comp_end != 0) {
                                result = comp_args_to_layout(&layout, &lsa);
@@ -1614,21 +1687,34 @@ static int lfs_setstripe(int argc, char **argv)
 
        fname = argv[optind];
 
 
        fname = argv[optind];
 
-       if (lsa.lsa_comp_end != 0) {
-               result = comp_args_to_layout(&layout, &lsa);
-               if (result) {
-                       fprintf(stderr, "%s %s: invalid component layout\n",
-                               progname, argv[0]);
-                       goto usage_error;
-               }
-       }
-
        if (optind == argc) {
                fprintf(stderr, "%s %s: FILE must be specified\n",
                        progname, argv[0]);
                goto usage_error;
        }
 
        if (optind == argc) {
                fprintf(stderr, "%s %s: FILE must be specified\n",
                        progname, argv[0]);
                goto usage_error;
        }
 
+       if (create_mirror) {
+               if (!comp_add) {
+                       fprintf(stderr, "error: %s: --component-add must be "
+                               "specified with --mirror option\n", argv[0]);
+                       goto error;
+               }
+               if (lsa.lsa_comp_end == 0)
+                       lsa.lsa_comp_end = LUSTRE_EOF;
+               if (lsa.lsa_comp_end != LUSTRE_EOF) {
+                       fprintf(stderr,
+                               "error: %s: creating non-eof ending mirror\n",
+                               argv[0]);
+                       goto error;
+               }
+       }
+
+       if (lsa.lsa_comp_end != 0) {
+               result = comp_args_to_layout(&layout, &lsa);
+               if (result)
+                       goto error;
+       }
+
        /* Only LCME_FL_INIT flags is used in PFL, and it shouldn't be
         * altered by user space tool, so we don't need to support the
         * --component-set for this moment. */
        /* Only LCME_FL_INIT flags is used in PFL, and it shouldn't be
         * altered by user space tool, so we don't need to support the
         * --component-set for this moment. */
@@ -1687,11 +1773,13 @@ static int lfs_setstripe(int argc, char **argv)
                                progname, argv[0]);
                        goto usage_error;
                }
                                progname, argv[0]);
                        goto usage_error;
                }
-               result = adjust_first_extent(fname, layout);
-               if (result == -ENODATA)
-                       comp_add = 0;
-               else if (result != 0)
-                       goto error;
+               if (!create_mirror) {
+                       result = adjust_first_extent(fname, layout);
+                       if (result == -ENODATA)
+                               comp_add = 0;
+                       else if (result != 0)
+                               goto error;
+               }
        }
 
        if (mdt_idx_arg != NULL && optind > 3) {
        }
 
        if (mdt_idx_arg != NULL && optind > 3) {
@@ -1772,7 +1860,11 @@ static int lfs_setstripe(int argc, char **argv)
                        result = lfs_component_del(fname, comp_id,
                                                   lsa.lsa_comp_flags);
                } else if (comp_add != 0) {
                        result = lfs_component_del(fname, comp_id,
                                                   lsa.lsa_comp_flags);
                } else if (comp_add != 0) {
-                       result = lfs_component_add(fname, layout);
+                       if (create_mirror)
+                               result = lfs_create_mirror(fname, layout,
+                                                          mirror_file);
+                       else
+                               result = lfs_component_add(fname, layout);
                } else if (layout != NULL) {
                        result = lfs_component_create(fname, O_CREAT | O_WRONLY,
                                                      0644, layout);
                } else if (layout != NULL) {
                        result = lfs_component_create(fname, O_CREAT | O_WRONLY,
                                                      0644, layout);
index e6008a9..ccf9e8c 100644 (file)
@@ -2591,24 +2591,33 @@ static void lov_dump_comp_v1_header(struct find_param *param, char *path,
 
        if (verbose & VERBOSE_DETAIL) {
                llapi_printf(LLAPI_MSG_NORMAL, "composite_header:\n");
 
        if (verbose & VERBOSE_DETAIL) {
                llapi_printf(LLAPI_MSG_NORMAL, "composite_header:\n");
-               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_magic:       0x%08X\n",
+               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_magic:         0x%08X\n",
                             " ", comp_v1->lcm_magic);
                             " ", comp_v1->lcm_magic);
-               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_size:        %u\n",
+               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_size:          %u\n",
                             " ", comp_v1->lcm_size);
                             " ", comp_v1->lcm_size);
-               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_flags:       %u\n",
+               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_flags:         %u\n",
                             " ", comp_v1->lcm_flags);
        }
 
        if (verbose & VERBOSE_GENERATION) {
                if (verbose & ~VERBOSE_GENERATION)
                             " ", comp_v1->lcm_flags);
        }
 
        if (verbose & VERBOSE_GENERATION) {
                if (verbose & ~VERBOSE_GENERATION)
-                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_layout_gen:  ",
+                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_layout_gen:    ",
                                     " ");
                llapi_printf(LLAPI_MSG_NORMAL, "%u\n", comp_v1->lcm_layout_gen);
        }
 
                                     " ");
                llapi_printf(LLAPI_MSG_NORMAL, "%u\n", comp_v1->lcm_layout_gen);
        }
 
+       if (verbose & VERBOSE_MIRROR_COUNT) {
+               if (verbose & ~VERBOSE_MIRROR_COUNT)
+                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_mirror_count: ",
+                                    " ");
+               llapi_printf(LLAPI_MSG_NORMAL, "%u\n",
+                            comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ?
+                            comp_v1->lcm_mirror_count + 1 : 1);
+       }
+
        if (verbose & VERBOSE_COMP_COUNT) {
                if (verbose & ~VERBOSE_COMP_COUNT)
        if (verbose & VERBOSE_COMP_COUNT) {
                if (verbose & ~VERBOSE_COMP_COUNT)
-                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_entry_count: ",
+                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_entry_count:   ",
                                     " ");
                llapi_printf(LLAPI_MSG_NORMAL, "%u\n",
                             comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ?
                                     " ");
                llapi_printf(LLAPI_MSG_NORMAL, "%u\n",
                             comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ?
index f6e477c..5f474b8 100644 (file)
@@ -515,8 +515,9 @@ llapi_layout_to_lum(const struct llapi_layout *layout)
                comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
                comp_v1->lcm_size = lum_size;
                comp_v1->lcm_layout_gen = 0;
                comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
                comp_v1->lcm_size = lum_size;
                comp_v1->lcm_layout_gen = 0;
-               comp_v1->lcm_flags = 0;
+               comp_v1->lcm_flags = layout->llot_flags;
                comp_v1->lcm_entry_count = comp_cnt;
                comp_v1->lcm_entry_count = comp_cnt;
+               comp_v1->lcm_mirror_count = 0;
                offset += lum_size;
        }
 
                offset += lum_size;
        }
 
@@ -1511,6 +1512,20 @@ int llapi_layout_file_create(const char *path, int open_flags, int mode,
 }
 
 /**
 }
 
 /**
+ * Set flags to the header of a component layout.
+ */
+int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
+{
+       if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       layout->llot_flags = flags;
+       return 0;
+}
+
+/**
  * Fetch the start and end offset of the current layout component.
  *
  * \param[in] layout   the layout component
  * Fetch the start and end offset of the current layout component.
  *
  * \param[in] layout   the layout component
index 5791315..3c42583 100644 (file)
@@ -782,6 +782,7 @@ check_lov_comp_md_entry_v1(void)
        CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding);
 
        CHECK_VALUE_X(LCME_FL_INIT);
        CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding);
 
        CHECK_VALUE_X(LCME_FL_INIT);
+       CHECK_VALUE_X(LCME_FL_NEG);
 }
 
 static void
 }
 
 static void
@@ -794,11 +795,17 @@ check_lov_comp_md_v1(void)
        CHECK_MEMBER(lov_comp_md_v1, lcm_layout_gen);
        CHECK_MEMBER(lov_comp_md_v1, lcm_flags);
        CHECK_MEMBER(lov_comp_md_v1, lcm_entry_count);
        CHECK_MEMBER(lov_comp_md_v1, lcm_layout_gen);
        CHECK_MEMBER(lov_comp_md_v1, lcm_flags);
        CHECK_MEMBER(lov_comp_md_v1, lcm_entry_count);
+       CHECK_MEMBER(lov_comp_md_v1, lcm_mirror_count);
        CHECK_MEMBER(lov_comp_md_v1, lcm_padding1);
        CHECK_MEMBER(lov_comp_md_v1, lcm_padding2);
        CHECK_MEMBER(lov_comp_md_v1, lcm_entries[0]);
 
        CHECK_CDEFINE(LOV_MAGIC_COMP_V1);
        CHECK_MEMBER(lov_comp_md_v1, lcm_padding1);
        CHECK_MEMBER(lov_comp_md_v1, lcm_padding2);
        CHECK_MEMBER(lov_comp_md_v1, lcm_entries[0]);
 
        CHECK_CDEFINE(LOV_MAGIC_COMP_V1);
+
+       CHECK_VALUE(LCM_FL_NOT_FLR);
+       CHECK_VALUE(LCM_FL_RDONLY);
+       CHECK_VALUE(LCM_FL_WRITE_PENDING);
+       CHECK_VALUE(LCM_FL_SYNC_PENDING);
 }
 
 static void
 }
 
 static void
index f62a0b0..54fdbf5 100644 (file)
@@ -1724,6 +1724,8 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding));
        LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n",
                (unsigned)LCME_FL_INIT);
                 (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding));
        LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n",
                (unsigned)LCME_FL_INIT);
+       LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n",
+               (unsigned)LCME_FL_NEG);
 
        /* Checks for struct lov_comp_md_v1 */
        LASSERTF((int)sizeof(struct lov_comp_md_v1) == 32, "found %lld\n",
 
        /* Checks for struct lov_comp_md_v1 */
        LASSERTF((int)sizeof(struct lov_comp_md_v1) == 32, "found %lld\n",
@@ -1748,9 +1750,13 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_entry_count));
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count) == 2, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count));
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_entry_count));
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count) == 2, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count));
-       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 16, "found %lld\n",
+       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_mirror_count) == 16, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_v1, lcm_mirror_count));
+       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count));
+       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 18, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding1));
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding1));
-       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 8, "found %lld\n",
+       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 6, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1));
        LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding2) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding2));
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1));
        LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding2) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding2));
@@ -1761,6 +1767,14 @@ void lustre_assert_wire_constants(void)
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]) == 48, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]));
        CLASSERT(LOV_MAGIC_COMP_V1 == (0x0BD60000 | 0x0BD0));
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]) == 48, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]));
        CLASSERT(LOV_MAGIC_COMP_V1 == (0x0BD60000 | 0x0BD0));
+       LASSERTF(LCM_FL_NOT_FLR == 0, "found %lld\n",
+                (long long)LCM_FL_NOT_FLR);
+       LASSERTF(LCM_FL_RDONLY == 1, "found %lld\n",
+                (long long)LCM_FL_RDONLY);
+       LASSERTF(LCM_FL_WRITE_PENDING == 2, "found %lld\n",
+                (long long)LCM_FL_WRITE_PENDING);
+       LASSERTF(LCM_FL_SYNC_PENDING == 3, "found %lld\n",
+                (long long)LCM_FL_SYNC_PENDING);
 
        /* Checks for struct lmv_mds_md_v1 */
        LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
 
        /* Checks for struct lmv_mds_md_v1 */
        LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",