Whamcloud - gitweb
LU-9771 flr: Merge branch 'flr' 50/30250/2
authorJinshan Xiong <jinshan.xiong@intel.com>
Tue, 28 Nov 2017 16:56:24 +0000 (16:56 +0000)
committerJinshan Xiong <jinshan.xiong@intel.com>
Tue, 28 Nov 2017 16:58:54 +0000 (16:58 +0000)
Merge remote-tracking branch 'origin/flr'.

Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Change-Id: Idee9297fbcab2bea3bd5987c94e4b4e79c49b3b6

15 files changed:
1  2 
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/file.c
lustre/lod/lod_object.c
lustre/mdc/mdc_lib.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/osc/osc_request.c
lustre/tests/conf-sanity.sh
lustre/tests/sanity-hsm.sh
lustre/tests/sanity.sh
lustre/tests/test-framework.sh
lustre/utils/Makefile.am
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/wiretest.c

@@@ -193,6 -193,9 +193,9 @@@ struct filter_fid_old 
  struct filter_fid {
        struct lu_fid           ff_parent;
        struct ost_layout       ff_layout;
+       __u32                   ff_layout_version;
+       __u32                   ff_range; /* range of layout version that
+                                          * write are allowed */
  } __attribute__((packed));
  
  /* Userspace should treat lu_fid as opaque, and only use the following methods
@@@ -274,6 -277,17 +277,17 @@@ struct lustre_ost_attrs 
   */
  #define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
  
+ enum {
+       LSOM_FL_VALID = 1 << 0,
+ };
+ struct lustre_som_attrs {
+       __u16   lsa_valid;
+       __u16   lsa_reserved[3];
+       __u64   lsa_size;
+       __u64   lsa_blocks;
+ };
  /**
   * OST object IDentifier.
   */
@@@ -301,6 -315,31 +315,31 @@@ struct ll_futimes_3 
  };
  
  /*
+  * Maximum number of mirrors currently implemented.
+  */
+ #define LUSTRE_MIRROR_COUNT_MAX               16
+ /* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
+ enum ll_lease_mode {
+       LL_LEASE_RDLCK  = 0x01,
+       LL_LEASE_WRLCK  = 0x02,
+       LL_LEASE_UNLCK  = 0x04,
+ };
+ enum ll_lease_flags {
+       LL_LEASE_RESYNC         = 0x1,
+       LL_LEASE_RESYNC_DONE    = 0x2,
+ };
+ #define IOC_IDS_MAX   4096
+ struct ll_ioc_lease {
+       __u32           lil_mode;
+       __u32           lil_flags;
+       __u32           lil_count;
+       __u32           lil_ids[0];
+ };
+ /*
   * The ioctl naming rules:
   * LL_*     - works on the currently opened filehandle instead of parent dir
   * *_OBD_*  - gets data for both OSC or MDC (LOV, LMV indirectly)
  #define LL_IOC_GET_CONNECT_FLAGS        _IOWR('f', 174, __u64 *)
  #define LL_IOC_GET_MDTIDX               _IOR ('f', 175, int)
  #define LL_IOC_FUTIMES_3              _IOWR('f', 176, struct ll_futimes_3)
+ #define LL_IOC_FLR_SET_MIRROR         _IOW ('f', 177, long)
  /*    lustre_ioctl.h                  177-210 */
  #define LL_IOC_HSM_STATE_GET          _IOR('f', 211, struct hsm_user_state)
  #define LL_IOC_HSM_STATE_SET          _IOW('f', 212, struct hsm_state_set)
  #define LL_IOC_LMV_SETSTRIPE          _IOWR('f', 240, struct lmv_user_md)
  #define LL_IOC_LMV_GETSTRIPE          _IOWR('f', 241, struct lmv_user_md)
  #define LL_IOC_REMOVE_ENTRY           _IOWR('f', 242, __u64)
- #define LL_IOC_SET_LEASE              _IOWR('f', 243, long)
+ #define LL_IOC_SET_LEASE              _IOWR('f', 243, struct ll_ioc_lease)
+ #define LL_IOC_SET_LEASE_OLD          _IOWR('f', 243, long)
  #define LL_IOC_GET_LEASE              _IO('f', 244)
  #define LL_IOC_HSM_IMPORT             _IOWR('f', 245, struct hsm_user_import)
  #define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md)
@@@ -383,13 -424,6 +424,6 @@@ struct fsxattr 
  #define LL_IOC_FSSETXATTR             FS_IOC_FSSETXATTR
  
  
- /* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
- enum ll_lease_type {
-       LL_LEASE_RDLCK  = 0x1,
-       LL_LEASE_WRLCK  = 0x2,
-       LL_LEASE_UNLCK  = 0x4,
- };
  #define LL_STATFS_LMV         1
  #define LL_STATFS_LOV         2
  #define LL_STATFS_NODELAY     4
  #define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0    /* for specific OSTs */
  #define LOV_USER_MAGIC_COMP_V1        0x0BD60BD0
  
 -#define LMV_USER_MAGIC    0x0CD30CD0    /*default lmv magic*/
 +#define LMV_USER_MAGIC                0x0CD30CD0    /* default lmv magic */
 +#define LMV_USER_MAGIC_V0     0x0CD20CD0    /* old default lmv magic*/
  
  #define LOV_PATTERN_NONE      0x000
  #define LOV_PATTERN_RAID0     0x001
@@@ -514,7 -547,7 +548,7 @@@ struct lu_extent 
        __u64   e_end;
  };
  
- #define DEXT "[ %#llx , %#llx )"
+ #define DEXT "[%#llx, %#llx)"
  #define PEXT(ext) (ext)->e_start, (ext)->e_end
  
  static inline bool lu_extent_is_overlapped(struct lu_extent *e1,
        return e1->e_start < e2->e_end && e2->e_start < e1->e_end;
  }
  
+ static inline bool lu_extent_is_whole(struct lu_extent *e)
+ {
+       return e->e_start == 0 && e->e_end == LUSTRE_EOF;
+ }
  enum lov_comp_md_entry_flags {
        LCME_FL_PRIMARY = 0x00000001,   /* Not used */
        LCME_FL_STALE   = 0x00000002,   /* Not used */
  
  #define LCME_KNOWN_FLAGS      (LCME_FL_NEG | LCME_FL_INIT)
  
+ /* the highest bit in obdo::o_layout_version is used to mark if the file is
+  * being resynced. */
+ #define LU_LAYOUT_RESYNC      LCME_FL_NEG
  /* lcme_id can be specified as certain flags, and the the first
   * bit of lcme_id is used to indicate that the ID is representing
   * certain LCME_FL_* but not a real ID. Which implies we can have
@@@ -558,7 -600,33 +601,33 @@@ struct lov_comp_md_entry_v1 
        __u64                   lcme_padding[2];
  } __attribute__((packed));
  
- enum lov_comp_md_flags;
+ #define SEQ_ID_MAX            0x0000FFFF
+ #define SEQ_ID_MASK           SEQ_ID_MAX
+ /* bit 30:16 of lcme_id is used to store mirror id */
+ #define MIRROR_ID_MASK                0x7FFF0000
+ #define MIRROR_ID_SHIFT               16
+ static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid)
+ {
+       return ((mirror_id << MIRROR_ID_SHIFT) & MIRROR_ID_MASK) | seqid;
+ }
+ static inline __u16 mirror_id_of(__u32 id)
+ {
+       return (id & MIRROR_ID_MASK) >> MIRROR_ID_SHIFT;
+ }
+ /**
+  * on-disk data for lcm_flags. Valid if lcm_magic is LOV_MAGIC_COMP_V1.
+  */
+ enum lov_comp_md_flags {
+       /* the least 2 bits are used by FLR to record file state */
+       LCM_FL_NOT_FLR          = 0,
+       LCM_FL_RDONLY           = 1,
+       LCM_FL_WRITE_PENDING    = 2,
+       LCM_FL_SYNC_PENDING     = 3,
+       LCM_FL_FLR_MASK         = 0x3,
+ };
  
  struct lov_comp_md_v1 {
        __u32   lcm_magic;      /* LOV_USER_MAGIC_COMP_V1 */
        __u32   lcm_layout_gen;
        __u16   lcm_flags;
        __u16   lcm_entry_count;
-       __u64   lcm_padding1;
+       /* lcm_mirror_count stores the number of actual mirrors minus 1,
+        * so that non-flr files will have value 0 meaning 1 mirror. */
+       __u16   lcm_mirror_count;
+       __u16   lcm_padding1[3];
        __u64   lcm_padding2;
        struct lov_comp_md_entry_v1 lcm_entries[0];
  } __attribute__((packed));
  
+ /*
+  * Maximum number of mirrors Lustre can support.
+  */
+ #define LUSTRE_MIRROR_COUNT_MAX               16
  static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
  {
        if (stripes == (__u16)-1)
@@@ -858,6 -934,8 +935,8 @@@ struct if_quotactl 
  #define SWAP_LAYOUTS_KEEP_MTIME               (1 << 2)
  #define SWAP_LAYOUTS_KEEP_ATIME               (1 << 3)
  #define SWAP_LAYOUTS_CLOSE            (1 << 4)
+ #define MERGE_LAYOUTS_CLOSE           (1 << 5)
+ #define INTENT_LAYOUTS_CLOSE  (SWAP_LAYOUTS_CLOSE | MERGE_LAYOUTS_CLOSE)
  
  /* Swap XATTR_NAME_HSM as well, only on the MDT so far */
  #define SWAP_LAYOUTS_MDS_HSM          (1 << 31)
@@@ -894,6 -972,8 +973,8 @@@ enum changelog_rec_type 
        CL_CTIME    = 18,
        CL_ATIME    = 19,
        CL_MIGRATE  = 20,
+       CL_FLRW     = 21, /* FLR: file was firstly written */
+       CL_RESYNC   = 22, /* FLR: file was resync-ed */
        CL_LAST
  };
  
@@@ -901,7 -981,8 +982,8 @@@ static inline const char *changelog_typ
        static const char *changelog_str[] = {
                "MARK",  "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
                "RMDIR", "RENME", "RNMTO", "OPEN",  "CLOSE", "LYOUT", "TRUNC",
-               "SATTR", "XATTR", "HSM",   "MTIME", "CTIME", "ATIME", "MIGRT"
+               "SATTR", "XATTR", "HSM",   "MTIME", "CTIME", "ATIME", "MIGRT",
+               "FLRW",  "RESYNC",
        };
  
        if (type >= 0 && type < CL_LAST)
@@@ -1181,11 -1262,15 +1263,15 @@@ enum changelog_message_type 
  /********* Misc **********/
  
  struct ioc_data_version {
-       __u64 idv_version;
-       __u64 idv_flags;     /* See LL_DV_xxx */
+       __u64   idv_version;
+       __u32   idv_layout_version; /* FLR: layout version for OST objects */
+       __u32   idv_flags;      /* enum ioc_data_version_flags */
+ };
+ enum ioc_data_version_flags {
+       LL_DV_RD_FLUSH  = (1 << 0), /* Flush dirty pages from clients */
+       LL_DV_WR_FLUSH  = (1 << 1), /* Flush all caching pages from clients */
  };
- #define LL_DV_RD_FLUSH (1 << 0) /* Flush dirty pages from clients */
- #define LL_DV_WR_FLUSH (1 << 1) /* Flush all caching pages from clients */
  
  #ifndef offsetof
  #define offsetof(typ, memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
diff --combined lustre/llite/file.c
@@@ -144,14 -144,34 +144,34 @@@ static int ll_close_inode_openhandle(st
  
        ll_prepare_close(inode, op_data, och);
        switch (bias) {
+       case MDS_CLOSE_LAYOUT_MERGE:
+               /* merge blocks from the victim inode */
+               op_data->op_attr_blocks += ((struct inode *)data)->i_blocks;
+               op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
        case MDS_CLOSE_LAYOUT_SWAP:
                LASSERT(data != NULL);
-               op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
+               op_data->op_bias |= bias;
                op_data->op_data_version = 0;
                op_data->op_lease_handle = och->och_lease_handle;
                op_data->op_fid2 = *ll_inode2fid(data);
                break;
  
+       case MDS_CLOSE_RESYNC_DONE: {
+               struct ll_ioc_lease *ioc = data;
+               LASSERT(data != NULL);
+               op_data->op_attr_blocks +=
+                       ioc->lil_count * op_data->op_attr_blocks;
+               op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+               op_data->op_bias |= MDS_CLOSE_RESYNC_DONE;
+               op_data->op_lease_handle = och->och_lease_handle;
+               op_data->op_data = &ioc->lil_ids[0];
+               op_data->op_data_size =
+                       ioc->lil_count * sizeof(ioc->lil_ids[0]);
+               break;
+       }
        case MDS_HSM_RELEASE:
                LASSERT(data != NULL);
                op_data->op_bias |= MDS_HSM_RELEASE;
                CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
                       md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
  
-       if (rc == 0 &&
-           op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
+       if (rc == 0 && op_data->op_bias & bias) {
                struct mdt_body *body;
  
                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
@@@ -914,10 -933,12 +933,12 @@@ static int ll_check_swap_layouts_validi
  }
  
  static int ll_swap_layouts_close(struct obd_client_handle *och,
-                                struct inode *inode, struct inode *inode2)
+                                struct inode *inode, struct inode *inode2,
+                                int intent)
  {
        const struct lu_fid     *fid1 = ll_inode2fid(inode);
        const struct lu_fid     *fid2;
+       enum mds_op_bias         bias;
        int                      rc;
        ENTRY;
  
        if (rc == 0)
                GOTO(out_free_och, rc = -EINVAL);
  
-       /* Close the file and swap layouts between inode & inode2.
+       switch (intent) {
+       case SWAP_LAYOUTS_CLOSE:
+               bias = MDS_CLOSE_LAYOUT_SWAP;
+               break;
+       case MERGE_LAYOUTS_CLOSE:
+               bias = MDS_CLOSE_LAYOUT_MERGE;
+               break;
+       default:
+               GOTO(out_free_och, rc = -EOPNOTSUPP);
+       }
+       /* Close the file and {swap,merge} layouts between inode & inode2.
         * NB: lease lock handle is released in mdc_close_layout_swap_pack()
         * because we still need it to pack l_remote_handle to MDT. */
-       rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
-                                      inode2);
+       rc = ll_close_inode_openhandle(inode, och, bias, inode2);
  
        och = NULL; /* freed in ll_close_inode_openhandle() */
  
@@@ -954,8 -985,10 +985,10 @@@ out_free_och
   * Release lease and close the file.
   * It will check if the lease has ever broken.
   */
- static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
-                         bool *lease_broken)
+ static int ll_lease_close_intent(struct obd_client_handle *och,
+                                struct inode *inode,
+                                bool *lease_broken, enum mds_op_bias bias,
+                                void *data)
  {
        struct ldlm_lock *lock;
        bool cancelled = true;
                LDLM_LOCK_PUT(lock);
        }
  
-       CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
-              PFID(&ll_i2info(inode)->lli_fid), cancelled);
-       if (!cancelled)
-               ldlm_cli_cancel(&och->och_lease_handle, 0);
+       CDEBUG(D_INODE, "lease for "DFID" broken? %d, bias: %x\n",
+              PFID(&ll_i2info(inode)->lli_fid), cancelled, bias);
  
        if (lease_broken != NULL)
                *lease_broken = cancelled;
  
-       rc = ll_close_inode_openhandle(inode, och, 0, NULL);
+       if (!cancelled && !bias)
+               ldlm_cli_cancel(&och->och_lease_handle, 0);
+       if (cancelled) { /* no need to excute intent */
+               bias = 0;
+               data = NULL;
+       }
+       rc = ll_close_inode_openhandle(inode, och, bias, data);
        RETURN(rc);
  }
  
+ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
+                         bool *lease_broken)
+ {
+       return ll_lease_close_intent(och, inode, lease_broken, 0, NULL);
+ }
+ /**
+  * After lease is taken, send the RPC MDS_REINT_RESYNC to the MDT
+  */
+ static int ll_lease_file_resync(struct obd_client_handle *och,
+                               struct inode *inode)
+ {
+       struct ll_sb_info *sbi = ll_i2sbi(inode);
+       struct md_op_data *op_data;
+       __u64 data_version_unused;
+       int rc;
+       ENTRY;
+       op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+                                    LUSTRE_OPC_ANY, NULL);
+       if (IS_ERR(op_data))
+               RETURN(PTR_ERR(op_data));
+       /* before starting file resync, it's necessary to clean up page cache
+        * in client memory, otherwise once the layout version is increased,
+        * writing back cached data will be denied the OSTs. */
+       rc = ll_data_version(inode, &data_version_unused, LL_DV_WR_FLUSH);
+       if (rc)
+               GOTO(out, rc);
+       op_data->op_handle = och->och_lease_handle;
+       rc = md_file_resync(sbi->ll_md_exp, op_data);
+       if (rc)
+               GOTO(out, rc);
+       EXIT;
+ out:
+       ll_finish_md_op_data(op_data);
+       return rc;
+ }
  int ll_merge_attr(const struct lu_env *env, struct inode *inode)
  {
        struct ll_inode_info *lli = ll_i2info(inode);
@@@ -1051,6 -1130,34 +1130,34 @@@ out_size_unlock
        RETURN(rc);
  }
  
+ /**
+  * Set designated mirror for I/O.
+  *
+  * So far only read, write, and truncated can support to issue I/O to
+  * designated mirror.
+  */
+ void ll_io_set_mirror(struct cl_io *io, const struct file *file)
+ {
+       struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+       /* clear layout version for generic(non-resync) I/O in case it carries
+        * stale layout version due to I/O restart */
+       io->ci_layout_version = 0;
+       /* FLR: disable non-delay for designated mirror I/O because obviously
+        * only one mirror is available */
+       if (fd->fd_designated_mirror > 0) {
+               io->ci_ndelay = 0;
+               io->ci_designated_mirror = fd->fd_designated_mirror;
+               io->ci_layout_version = fd->fd_layout_version;
+               io->ci_pio = 0; /* doesn't have a mechanism to pass mirror
+                                * io to ptasks */
+       }
+       CDEBUG(D_VFSTRACE, "%s: desiginated mirror: %d\n",
+              file->f_path.dentry->d_name.name, io->ci_designated_mirror);
+ }
  static bool file_is_noatime(const struct file *file)
  {
        const struct vfsmount *mnt = file->f_path.mnt;
@@@ -1111,6 -1218,12 +1218,12 @@@ static void ll_io_init(struct cl_io *io
                io->ci_pio = !io->u.ci_rw.rw_append;
        else
                io->ci_pio = 0;
+       /* FLR: only use non-delay I/O for read as there is only one
+        * avaliable mirror for write. */
+       io->ci_ndelay = !(iot == CIT_WRITE);
+       ll_io_set_mirror(io, file);
  }
  
  static int ll_file_io_ptask(struct cfs_ptask *ptask)
        __u16 refcheck;
        ENTRY;
  
-       env = cl_env_get(&refcheck);
-       if (IS_ERR(env))
-               RETURN(PTR_ERR(env));
        CDEBUG(D_VFSTRACE, "%s: %s range: [%llu, %llu)\n",
                file_dentry(file)->d_name.name,
                pt->cip_iot == CIT_READ ? "read" : "write",
                pos, pos + pt->cip_count);
  
- restart:
+       env = cl_env_get(&refcheck);
+       if (IS_ERR(env))
+               RETURN(PTR_ERR(env));
        io = vvp_env_thread_io(env);
        ll_io_init(io, file, pt->cip_iot);
        io->u.ci_rw.rw_iter = pt->cip_iter;
        }
  
        cl_io_fini(env, io);
+       cl_env_put(env, &refcheck);
  
-       if ((rc == 0 || rc == -ENODATA) &&
-           pt->cip_result < pt->cip_count &&
-           io->ci_need_restart) {
-               CDEBUG(D_VFSTRACE,
-                       "%s: restart %s range: [%llu, %llu) ret: %zd, rc: %d\n",
-                       file_dentry(file)->d_name.name,
-                       pt->cip_iot == CIT_READ ? "read" : "write",
-                       pos, pos + pt->cip_count - pt->cip_result,
-                       pt->cip_result, rc);
-               goto restart;
-       }
+       pt->cip_need_restart = io->ci_need_restart;
  
        CDEBUG(D_VFSTRACE, "%s: %s ret: %zd, rc: %d\n",
                file_dentry(file)->d_name.name,
                pt->cip_iot == CIT_READ ? "read" : "write",
                pt->cip_result, rc);
  
-       cl_env_put(env, &refcheck);
        RETURN(pt->cip_result > 0 ? 0 : rc);
  }
  
@@@ -1211,6 -1313,8 +1313,8 @@@ ll_file_io_generic(const struct lu_env 
        loff_t                  pos = *ppos;
        ssize_t                 result = 0;
        int                     rc = 0;
+       unsigned                retried = 0;
+       bool                    restarted = false;
  
        ENTRY;
  
@@@ -1224,9 -1328,10 +1328,10 @@@ restart
        if (args->via_io_subtype == IO_NORMAL) {
                io->u.ci_rw.rw_iter = *args->u.normal.via_iter;
                io->u.ci_rw.rw_iocb = *args->u.normal.via_iocb;
-       } else {
-               io->ci_pio = 0;
        }
+       if (args->via_io_subtype != IO_NORMAL || restarted)
+               io->ci_pio = 0;
+       io->ci_ndelay_tried = retried;
  
        if (cl_io_rw_init(env, io, iot, pos, count) == 0) {
                bool range_locked = false;
  out:
        cl_io_fini(env, io);
  
+       CDEBUG(D_VFSTRACE,
+              "%s: %d io complete with rc: %d, result: %zd, restart: %d\n",
+              file->f_path.dentry->d_name.name,
+              iot, rc, result, io->ci_need_restart);
        if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
                CDEBUG(D_VFSTRACE,
                        "%s: restart %s range: [%llu, %llu) ret: %zd, rc: %d\n",
                        file_dentry(file)->d_name.name,
                        iot == CIT_READ ? "read" : "write",
                        pos, pos + count, result, rc);
+               /* preserve the tried count for FLR */
+               retried = io->ci_ndelay_tried;
+               restarted = true;
                goto restart;
        }
  
@@@ -1869,6 -1982,10 +1982,10 @@@ ll_get_grouplock(struct inode *inode, s
                struct cl_layout cl = {
                        .cl_is_composite = false,
                };
+               struct lu_extent ext = {
+                       .e_start = 0,
+                       .e_end = OBD_OBJECT_EOF,
+               };
  
                env = cl_env_get(&refcheck);
                if (IS_ERR(env))
  
                rc = cl_object_layout_get(env, obj, &cl);
                if (!rc && cl.cl_is_composite)
-                       rc = ll_layout_write_intent(inode, 0, OBD_OBJECT_EOF);
+                       rc = ll_layout_write_intent(inode, LAYOUT_INTENT_WRITE,
+                                                   &ext);
  
                cl_env_put(env, &refcheck);
                if (rc)
@@@ -2086,18 -2204,8 +2204,8 @@@ gf_free
        RETURN(rc);
  }
  
- /*
-  * Read the data_version for inode.
-  *
-  * This value is computed using stripe object version on OST.
-  * Version is computed using server side locking.
-  *
-  * @param flags if do sync on the OST side;
-  *            0: no sync
-  *            LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
-  *            LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
-  */
- int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
+ static int
+ ll_ioc_data_version(struct inode *inode, struct ioc_data_version *ioc)
  {
        struct cl_object *obj = ll_i2info(inode)->lli_clob;
        struct lu_env *env;
  
        ENTRY;
  
+       ioc->idv_version = 0;
+       ioc->idv_layout_version = UINT_MAX;
        /* If no file object initialized, we consider its version is 0. */
-       if (obj == NULL) {
-               *data_version = 0;
+       if (obj == NULL)
                RETURN(0);
-       }
  
        env = cl_env_get(&refcheck);
        if (IS_ERR(env))
        io = vvp_env_thread_io(env);
        io->ci_obj = obj;
        io->u.ci_data_version.dv_data_version = 0;
-       io->u.ci_data_version.dv_flags = flags;
+       io->u.ci_data_version.dv_layout_version = UINT_MAX;
+       io->u.ci_data_version.dv_flags = ioc->idv_flags;
  
  restart:
        if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
        else
                result = io->ci_result;
  
-       *data_version = io->u.ci_data_version.dv_data_version;
+       ioc->idv_version = io->u.ci_data_version.dv_data_version;
+       ioc->idv_layout_version = io->u.ci_data_version.dv_layout_version;
  
        cl_io_fini(env, io);
  
  }
  
  /*
+  * Read the data_version for inode.
+  *
+  * This value is computed using stripe object version on OST.
+  * Version is computed using server side locking.
+  *
+  * @param flags if do sync on the OST side;
+  *            0: no sync
+  *            LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
+  *            LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
+  */
+ int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
+ {
+       struct ioc_data_version ioc = { .idv_flags = flags };
+       int rc;
+       rc = ll_ioc_data_version(inode, &ioc);
+       if (!rc)
+               *data_version = ioc.idv_version;
+       return rc;
+ }
+ /*
   * Trigger a HSM release request for the provided inode.
   */
  int ll_hsm_release(struct inode *inode)
@@@ -2689,7 -2823,6 +2823,7 @@@ int ll_ioctl_fsgetxattr(struct inode *i
                           sizeof(fsxattr)))
                RETURN(-EFAULT);
  
 +      fsxattr.fsx_xflags = ll_inode_to_ext_flags(inode->i_flags);
        fsxattr.fsx_projid = ll_i2info(inode)->lli_projid;
        if (copy_to_user((struct fsxattr __user *)arg,
                         &fsxattr, sizeof(fsxattr)))
@@@ -2706,7 -2839,6 +2840,7 @@@ int ll_ioctl_fssetxattr(struct inode *i
        struct ptlrpc_request *req = NULL;
        int rc = 0;
        struct fsxattr fsxattr;
 +      struct cl_object *obj;
  
        /* only root could change project ID */
        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
                           sizeof(fsxattr)))
                GOTO(out_fsxattr1, rc = -EFAULT);
  
 +      op_data->op_attr_flags = fsxattr.fsx_xflags;
        op_data->op_projid = fsxattr.fsx_projid;
 -      op_data->op_attr.ia_valid |= MDS_ATTR_PROJID;
 +      op_data->op_attr.ia_valid |= (MDS_ATTR_PROJID | ATTR_ATTR_FLAG);
        rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data, NULL,
                        0, &req);
        ptlrpc_req_finished(req);
  
 +      obj = ll_i2info(inode)->lli_clob;
 +      if (obj) {
 +              struct iattr *attr;
 +
 +              inode->i_flags = ll_ext_to_inode_flags(fsxattr.fsx_xflags);
 +              OBD_ALLOC_PTR(attr);
 +              if (attr == NULL)
 +                      GOTO(out_fsxattr1, rc = -ENOMEM);
 +              attr->ia_valid = ATTR_ATTR_FLAG;
 +              rc = cl_setattr_ost(obj, attr, fsxattr.fsx_xflags);
 +
 +              OBD_FREE_PTR(attr);
 +      }
  out_fsxattr1:
        ll_finish_md_op_data(op_data);
        RETURN(rc);
+ }
+ static long ll_file_unlock_lease(struct file *file, struct ll_ioc_lease *ioc,
+                                unsigned long arg)
+ {
+       struct inode            *inode = file_inode(file);
+       struct ll_file_data     *fd = LUSTRE_FPRIVATE(file);
+       struct ll_inode_info    *lli = ll_i2info(inode);
+       struct obd_client_handle *och = NULL;
+       bool lease_broken;
+       fmode_t fmode = 0;
+       enum mds_op_bias bias = 0;
+       void *data = NULL;
+       size_t data_size = 0;
+       long rc;
+       ENTRY;
+       mutex_lock(&lli->lli_och_mutex);
+       if (fd->fd_lease_och != NULL) {
+               och = fd->fd_lease_och;
+               fd->fd_lease_och = NULL;
+       }
+       mutex_unlock(&lli->lli_och_mutex);
+       if (och == NULL)
+               GOTO(out, rc = -ENOLCK);
+       fmode = och->och_flags;
+       if (ioc->lil_flags & LL_LEASE_RESYNC_DONE) {
+               if (ioc->lil_count > IOC_IDS_MAX)
+                       GOTO(out, rc = -EINVAL);
+               data_size = offsetof(typeof(*ioc), lil_ids[ioc->lil_count]);
+               OBD_ALLOC(data, data_size);
+               if (!data)
+                       GOTO(out, rc = -ENOMEM);
+               if (copy_from_user(data, (void __user *)arg, data_size))
+                       GOTO(out, rc = -EFAULT);
+               bias = MDS_CLOSE_RESYNC_DONE;
+       }
+       rc = ll_lease_close_intent(och, inode, &lease_broken, bias, data);
+       if (rc < 0)
+               GOTO(out, rc);
+       rc = ll_lease_och_release(inode, file);
+       if (rc < 0)
+               GOTO(out, rc);
+       if (lease_broken)
+               fmode = 0;
+       EXIT;
+ out:
+       if (data)
+               OBD_FREE(data, data_size);
+       if (!rc)
+               rc = ll_lease_type_from_fmode(fmode);
+       RETURN(rc);
+ }
+ static long ll_file_set_lease(struct file *file, struct ll_ioc_lease *ioc,
+                             unsigned long arg)
+ {
+       struct inode *inode = file_inode(file);
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+       struct obd_client_handle *och = NULL;
+       __u64 open_flags = 0;
+       bool lease_broken;
+       fmode_t fmode;
+       long rc;
+       ENTRY;
+       switch (ioc->lil_mode) {
+       case LL_LEASE_WRLCK:
+               if (!(file->f_mode & FMODE_WRITE))
+                       RETURN(-EPERM);
+               fmode = FMODE_WRITE;
+               break;
+       case LL_LEASE_RDLCK:
+               if (!(file->f_mode & FMODE_READ))
+                       RETURN(-EPERM);
+               fmode = FMODE_READ;
+               break;
+       case LL_LEASE_UNLCK:
+               RETURN(ll_file_unlock_lease(file, ioc, arg));
+       default:
+               RETURN(-EINVAL);
+       }
  
+       CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
+       /* apply for lease */
+       if (ioc->lil_flags & LL_LEASE_RESYNC)
+               open_flags = MDS_OPEN_RESYNC;
+       och = ll_lease_open(inode, file, fmode, open_flags);
+       if (IS_ERR(och))
+               RETURN(PTR_ERR(och));
  
+       if (ioc->lil_flags & LL_LEASE_RESYNC) {
+               rc = ll_lease_file_resync(och, inode);
+               if (rc) {
+                       ll_lease_close(och, inode, NULL);
+                       RETURN(rc);
+               }
+               rc = ll_layout_refresh(inode, &fd->fd_layout_version);
+               if (rc) {
+                       ll_lease_close(och, inode, NULL);
+                       RETURN(rc);
+               }
+       }
+       rc = 0;
+       mutex_lock(&lli->lli_och_mutex);
+       if (fd->fd_lease_och == NULL) {
+               fd->fd_lease_och = och;
+               och = NULL;
+       }
+       mutex_unlock(&lli->lli_och_mutex);
+       if (och != NULL) {
+               /* impossible now that only excl is supported for now */
+               ll_lease_close(och, inode, &lease_broken);
+               rc = -EBUSY;
+       }
+       RETURN(rc);
  }
  
  static long
@@@ -2799,6 -3044,7 +3060,7 @@@ ll_file_ioctl(struct file *file, unsign
        case LL_IOC_LOV_SWAP_LAYOUTS: {
                struct file *file2;
                struct lustre_swap_layouts lsl;
+               __u64 intent;
  
                if (copy_from_user(&lsl, (char __user *)arg,
                                   sizeof(struct lustre_swap_layouts)))
                if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
                        GOTO(out, rc = -EPERM);
  
-               if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
+               intent = lsl.sl_flags & INTENT_LAYOUTS_CLOSE;
+               if (intent) {
                        struct inode                    *inode2;
                        struct ll_inode_info            *lli;
                        struct obd_client_handle        *och = NULL;
  
-                       if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
-                               GOTO(out, rc = -EINVAL);
                        lli = ll_i2info(inode);
                        mutex_lock(&lli->lli_och_mutex);
                        if (fd->fd_lease_och != NULL) {
                        if (och == NULL)
                                GOTO(out, rc = -ENOLCK);
                        inode2 = file_inode(file2);
-                       rc = ll_swap_layouts_close(och, inode, inode2);
+                       rc = ll_swap_layouts_close(och, inode, inode2, intent);
                } else {
                        rc = ll_swap_layouts(file, file2, &lsl);
                }
@@@ -2885,7 -3129,7 +3145,7 @@@ out
                        RETURN(-EFAULT);
  
                idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
-               rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
+               rc = ll_ioc_data_version(inode, &idv);
  
                if (rc == 0 &&
                    copy_to_user((char __user *)arg, &idv, sizeof(idv)))
                OBD_FREE_PTR(hca);
                RETURN(rc);
        }
-       case LL_IOC_SET_LEASE: {
-               struct ll_inode_info *lli = ll_i2info(inode);
-               struct obd_client_handle *och = NULL;
-               bool lease_broken;
-               fmode_t fmode;
-               switch (arg) {
-               case LL_LEASE_WRLCK:
-                       if (!(file->f_mode & FMODE_WRITE))
-                               RETURN(-EPERM);
-                       fmode = FMODE_WRITE;
-                       break;
-               case LL_LEASE_RDLCK:
-                       if (!(file->f_mode & FMODE_READ))
-                               RETURN(-EPERM);
-                       fmode = FMODE_READ;
-                       break;
-               case LL_LEASE_UNLCK:
-                       mutex_lock(&lli->lli_och_mutex);
-                       if (fd->fd_lease_och != NULL) {
-                               och = fd->fd_lease_och;
-                               fd->fd_lease_och = NULL;
-                       }
-                       mutex_unlock(&lli->lli_och_mutex);
+       case LL_IOC_SET_LEASE_OLD: {
+               struct ll_ioc_lease ioc = { .lil_mode = (__u32)arg };
  
-                       if (och == NULL)
-                               RETURN(-ENOLCK);
-                       fmode = och->och_flags;
-                       rc = ll_lease_close(och, inode, &lease_broken);
-                       if (rc < 0)
-                               RETURN(rc);
-                       rc = ll_lease_och_release(inode, file);
-                       if (rc < 0)
-                               RETURN(rc);
-                       if (lease_broken)
-                               fmode = 0;
-                       RETURN(ll_lease_type_from_fmode(fmode));
-               default:
-                       RETURN(-EINVAL);
-               }
-               CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
+               RETURN(ll_file_set_lease(file, &ioc, 0));
+       }
+       case LL_IOC_SET_LEASE: {
+               struct ll_ioc_lease ioc;
  
-               /* apply for lease */
-               och = ll_lease_open(inode, file, fmode, 0);
-               if (IS_ERR(och))
-                       RETURN(PTR_ERR(och));
+               if (copy_from_user(&ioc, (void __user *)arg, sizeof(ioc)))
+                       RETURN(-EFAULT);
  
-               rc = 0;
-               mutex_lock(&lli->lli_och_mutex);
-               if (fd->fd_lease_och == NULL) {
-                       fd->fd_lease_och = och;
-                       och = NULL;
-               }
-               mutex_unlock(&lli->lli_och_mutex);
-               if (och != NULL) {
-                       /* impossible now that only excl is supported for now */
-                       ll_lease_close(och, inode, &lease_broken);
-                       rc = -EBUSY;
-               }
-               RETURN(rc);
+               RETURN(ll_file_set_lease(file, &ioc, arg));
        }
        case LL_IOC_GET_LEASE: {
                struct ll_inode_info *lli = ll_i2info(inode);
@@@ -3173,6 -3364,15 +3380,15 @@@ out_ladvise
                OBD_FREE(k_ladvise_hdr, alloc_size);
                RETURN(rc);
        }
+       case LL_IOC_FLR_SET_MIRROR: {
+               /* mirror I/O must be direct to avoid polluting page cache
+                * by stale data. */
+               if (!(file->f_flags & O_DIRECT))
+                       RETURN(-EINVAL);
+               fd->fd_designated_mirror = (__u32)arg;
+               RETURN(0);
+       }
        case LL_IOC_FSGETXATTR:
                RETURN(ll_ioctl_fsgetxattr(inode, cmd, arg));
        case LL_IOC_FSSETXATTR:
@@@ -4696,19 -4896,20 +4912,20 @@@ int ll_layout_refresh(struct inode *ino
   * Issue layout intent RPC indicating where in a file an IO is about to write.
   *
   * \param[in] inode   file inode.
-  * \param[in] start   start offset of fille in bytes where an IO is about to
-  *                    write.
-  * \param[in] end     exclusive end offset in bytes of the write range.
+  * \param[in] ext     write range with start offset of fille in bytes where
+  *                    an IO is about to write, and exclusive end offset in
+  *                    bytes.
   *
   * \retval 0  on success
   * \retval < 0        error code
   */
- int ll_layout_write_intent(struct inode *inode, __u64 start, __u64 end)
+ int ll_layout_write_intent(struct inode *inode, enum layout_intent_opc opc,
+                          struct lu_extent *ext)
  {
        struct layout_intent intent = {
-               .li_opc = LAYOUT_INTENT_WRITE,
-               .li_start = start,
-               .li_end = end,
+               .li_opc = opc,
+               .li_extent.e_start = ext->e_start,
+               .li_extent.e_end = ext->e_end,
        };
        int rc;
        ENTRY;
diff --combined lustre/lod/lod_object.c
@@@ -1047,7 -1047,7 +1047,7 @@@ static int lod_attr_get(const struct lu
  }
  
  int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo,
-                           struct thandle *th, lod_obj_stripe_cb_t cb,
+                           struct thandle *th,
                            struct lod_obj_stripe_cb_data *data)
  {
        struct lod_layout_component *lod_comp;
                if (lod_comp->llc_stripe == NULL)
                        continue;
  
+               /* has stripe but not inited yet, this component has been
+                * declared to be created, but hasn't created yet.
+                */
+               if (!lod_comp_inited(lod_comp))
+                       continue;
+               if (data->locd_comp_skip_cb &&
+                   data->locd_comp_skip_cb(env, lo, i, data))
+                       continue;
                LASSERT(lod_comp->llc_stripe_count > 0);
                for (j = 0; j < lod_comp->llc_stripe_count; j++) {
                        struct dt_object *dt = lod_comp->llc_stripe[j];
  
                        if (dt == NULL)
                                continue;
-                       rc = cb(env, lo, dt, th, j, data);
+                       rc = data->locd_stripe_cb(env, lo, dt, th, i, j, data);
                        if (rc != 0)
                                RETURN(rc);
                }
        RETURN(0);
  }
  
+ static bool lod_obj_attr_set_comp_skip_cb(const struct lu_env *env,
+               struct lod_object *lo, int comp_idx,
+               struct lod_obj_stripe_cb_data *data)
+ {
+       struct lod_layout_component *lod_comp = &lo->ldo_comp_entries[comp_idx];
+       bool skipped = false;
+       if (!(data->locd_attr->la_valid & LA_LAYOUT_VERSION))
+               return skipped;
+       switch (lo->ldo_flr_state) {
+       case LCM_FL_WRITE_PENDING: {
+               int i;
+               /* skip stale components */
+               if (lod_comp->llc_flags & LCME_FL_STALE) {
+                       skipped = true;
+                       break;
+               }
+               /* skip valid and overlapping components, therefore any
+                * attempts to write overlapped components will never succeed
+                * because client will get EINPROGRESS. */
+               for (i = 0; i < lo->ldo_comp_cnt; i++) {
+                       if (i == comp_idx)
+                               continue;
+                       if (lo->ldo_comp_entries[i].llc_flags & LCME_FL_STALE)
+                               continue;
+                       if (lu_extent_is_overlapped(&lod_comp->llc_extent,
+                                       &lo->ldo_comp_entries[i].llc_extent)) {
+                               skipped = true;
+                               break;
+                       }
+               }
+               break;
+       }
+       default:
+               LASSERTF(0, "impossible: %d\n", lo->ldo_flr_state);
+       case LCM_FL_SYNC_PENDING:
+               break;
+       }
+       CDEBUG(D_LAYOUT, DFID": %s to set component %x to version: %u\n",
+              PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
+              skipped ? "skipped" : "chose", lod_comp->llc_id,
+              data->locd_attr->la_layout_version);
+       return skipped;
+ }
  static inline int
  lod_obj_stripe_attr_set_cb(const struct lu_env *env, struct lod_object *lo,
                           struct dt_object *dt, struct thandle *th,
-                          int stripe_idx, struct lod_obj_stripe_cb_data *data)
+                          int comp_idx, int stripe_idx,
+                          struct lod_obj_stripe_cb_data *data)
  {
        if (data->locd_declare)
                return lod_sub_declare_attr_set(env, dt, data->locd_attr, th);
  
+       if (data->locd_attr->la_valid & LA_LAYOUT_VERSION) {
+               CDEBUG(D_LAYOUT, DFID": set layout version: %u, comp_idx: %d\n",
+                      PFID(lu_object_fid(&dt->do_lu)),
+                      data->locd_attr->la_layout_version, comp_idx);
+       }
        return lod_sub_attr_set(env, dt, data->locd_attr, th);
  }
  
@@@ -1120,7 -1189,7 +1189,7 @@@ static int lod_declare_attr_set(const s
         * speed up rename().
         */
        if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
-               if (!(attr->la_valid & (LA_UID | LA_GID | LA_PROJID)))
+               if (!(attr->la_valid & LA_REMOTE_ATTR_SET))
                        RETURN(rc);
  
                if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
                                RETURN(rc);
                }
        } else {
-               struct lod_obj_stripe_cb_data data;
+               struct lod_obj_stripe_cb_data data = { { 0 } };
  
                data.locd_attr = attr;
                data.locd_declare = true;
-               rc = lod_obj_for_each_stripe(env, lo, th,
-                               lod_obj_stripe_attr_set_cb, &data);
+               data.locd_stripe_cb = lod_obj_stripe_attr_set_cb;
+               rc = lod_obj_for_each_stripe(env, lo, th, &data);
        }
  
        if (rc)
@@@ -1217,7 -1286,7 +1286,7 @@@ static int lod_attr_set(const struct lu
                RETURN(rc);
  
        if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
-               if (!(attr->la_valid & (LA_UID | LA_GID | LA_PROJID)))
+               if (!(attr->la_valid & LA_REMOTE_ATTR_SET))
                        RETURN(rc);
  
                if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
                        RETURN(rc);
        }
  
+       /* FIXME: a tricky case in the code path of mdd_layout_change():
+        * the in-memory striping information has been freed in lod_xattr_set()
+        * due to layout change. It has to load stripe here again. It only
+        * changes flags of layout so declare_attr_set() is still accurate */
+       rc = lod_load_striping_locked(env, lo);
+       if (rc)
+               RETURN(rc);
        if (!lod_obj_is_striped(dt))
                RETURN(0);
  
                                break;
                }
        } else {
-               struct lod_obj_stripe_cb_data data;
+               struct lod_obj_stripe_cb_data data = { { 0 } };
  
                data.locd_attr = attr;
                data.locd_declare = false;
-               rc = lod_obj_for_each_stripe(env, lo, th,
-                               lod_obj_stripe_attr_set_cb, &data);
+               data.locd_comp_skip_cb = lod_obj_attr_set_comp_skip_cb;
+               data.locd_stripe_cb = lod_obj_stripe_attr_set_cb;
+               rc = lod_obj_for_each_stripe(env, lo, th, &data);
        }
  
        if (rc)
@@@ -1954,10 -2032,12 +2032,10 @@@ static int lod_declare_xattr_set_lmv(co
                                     struct thandle *th)
  {
        struct lod_object       *lo = lod_dt_obj(dt);
 -      struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
 -      struct lmv_user_md_v1   *lum;
 +      struct lmv_user_md_v1   *lum = lum_buf->lb_buf;
        int                     rc;
        ENTRY;
  
 -      lum = lum_buf->lb_buf;
        LASSERT(lum != NULL);
  
        CDEBUG(D_INFO, "lum magic = %x count = %u offset = %d\n",
        if (le32_to_cpu(lum->lum_stripe_count) == 0)
                GOTO(out, rc = 0);
  
 -      rc = lod_verify_md_striping(lod, lum);
 -      if (rc != 0)
 -              GOTO(out, rc);
 -
        /* prepare dir striped objects */
        rc = lod_prep_md_striped_create(env, dt, attr, lum, dof, th);
        if (rc != 0) {
@@@ -2011,7 -2095,7 +2089,7 @@@ static int lod_dir_declare_xattr_set(co
                if (rc != 0)
                        RETURN(rc);
        } else if (strcmp(name, XATTR_NAME_LOV) == 0) {
-               rc = lod_verify_striping(d, buf, false, 0);
+               rc = lod_verify_striping(d, lo, buf, false);
                if (rc != 0)
                        RETURN(rc);
        }
@@@ -2051,7 -2135,7 +2129,7 @@@ static in
  lod_obj_stripe_replace_parent_fid_cb(const struct lu_env *env,
                                     struct lod_object *lo,
                                     struct dt_object *dt, struct thandle *th,
-                                    int stripe_idx,
+                                    int comp_idx, int stripe_idx,
                                     struct lod_obj_stripe_cb_data *data)
  {
        struct lod_thread_info *info = lod_env_info(env);
@@@ -2104,7 -2188,7 +2182,7 @@@ static int lod_replace_parent_fid(cons
        struct lod_thread_info  *info = lod_env_info(env);
        struct lu_buf *buf = &info->lti_buf;
        struct filter_fid *ff;
-       struct lod_obj_stripe_cb_data data;
+       struct lod_obj_stripe_cb_data data = { { 0 } };
        int rc;
        ENTRY;
  
        buf->lb_len = info->lti_ea_store_size;
  
        data.locd_declare = declare;
-       rc = lod_obj_for_each_stripe(env, lo, th,
-                                    lod_obj_stripe_replace_parent_fid_cb,
-                                    &data);
+       data.locd_stripe_cb = lod_obj_stripe_replace_parent_fid_cb;
+       rc = lod_obj_for_each_stripe(env, lo, th, &data);
  
        RETURN(rc);
  }
@@@ -2212,7 -2295,7 +2289,7 @@@ static int lod_declare_layout_add(cons
                                  struct thandle *th)
  {
        struct lod_thread_info  *info = lod_env_info(env);
-       struct lod_layout_component *comp_array, *lod_comp;
+       struct lod_layout_component *comp_array, *lod_comp, *old_array;
        struct lod_device       *d = lu2lod_dev(dt->do_lu.lo_dev);
        struct dt_object *next = dt_object_child(dt);
        struct lov_desc         *desc = &d->lod_desc;
        struct lov_user_md_v3   *v3;
        struct lov_comp_md_v1   *comp_v1 = buf->lb_buf;
        __u32   magic;
-       __u64   prev_end;
-       int     i, rc, array_cnt;
+       int     i, rc, array_cnt, old_array_cnt;
        ENTRY;
  
        LASSERT(lo->ldo_is_composite);
  
-       prev_end = lo->ldo_comp_entries[lo->ldo_comp_cnt - 1].llc_extent.e_end;
-       rc = lod_verify_striping(d, buf, false, prev_end);
+       if (lo->ldo_flr_state != LCM_FL_NOT_FLR)
+               RETURN(-EBUSY);
+       rc = lod_verify_striping(d, lo, buf, false);
        if (rc != 0)
                RETURN(rc);
  
                lod_comp->llc_extent.e_start = ext->e_start;
                lod_comp->llc_extent.e_end = ext->e_end;
                lod_comp->llc_stripe_offset = v1->lmm_stripe_offset;
+               lod_comp->llc_flags = comp_v1->lcm_entries[i].lcme_flags;
  
                lod_comp->llc_stripe_count = v1->lmm_stripe_count;
                if (!lod_comp->llc_stripe_count ||
                }
        }
  
-       OBD_FREE(lo->ldo_comp_entries, sizeof(*lod_comp) * lo->ldo_comp_cnt);
+       old_array = lo->ldo_comp_entries;
+       old_array_cnt = lo->ldo_comp_cnt;
        lo->ldo_comp_entries = comp_array;
        lo->ldo_comp_cnt = array_cnt;
        /* No need to increase layout generation here, it will be increased
         * later when generating component ID for the new components */
  
        info->lti_buf.lb_len = lod_comp_md_size(lo, false);
        rc = lod_sub_declare_xattr_set(env, next, &info->lti_buf,
                                              XATTR_NAME_LOV, 0, th);
-       if (rc)
+       if (rc) {
+               lo->ldo_comp_entries = old_array;
+               lo->ldo_comp_cnt = old_array_cnt;
                GOTO(error, rc);
+       }
+       OBD_FREE(old_array, sizeof(*lod_comp) * old_array_cnt);
+       LASSERT(lo->ldo_mirror_count == 1);
+       lo->ldo_mirrors[0].lme_end = array_cnt - 1;
  
        RETURN(0);
  
@@@ -2416,9 -2512,8 +2506,8 @@@ static int lod_declare_layout_del(cons
  
        LASSERT(lo->ldo_is_composite);
  
-       rc = lod_verify_striping(d, buf, false, 0);
-       if (rc != 0)
-               RETURN(rc);
+       if (lo->ldo_flr_state != LCM_FL_NOT_FLR)
+               RETURN(-EBUSY);
  
        magic = comp_v1->lcm_magic;
        if (magic == __swab32(LOV_USER_MAGIC_COMP_V1)) {
@@@ -2586,6 -2681,213 +2675,213 @@@ unlock
  }
  
  /**
+  * Convert a plain file lov_mds_md to a composite layout.
+  *
+  * \param[in,out] info        the thread info::lti_ea_store buffer contains little
+  *                    endian plain file layout
+  *
+  * \retval            0 on success, <0 on failure
+  */
+ static int lod_layout_convert(struct lod_thread_info *info)
+ {
+       struct lov_mds_md *lmm = info->lti_ea_store;
+       struct lov_mds_md *lmm_save;
+       struct lov_comp_md_v1 *lcm;
+       struct lov_comp_md_entry_v1 *lcme;
+       size_t size;
+       __u32 blob_size;
+       int rc = 0;
+       ENTRY;
+       /* realloc buffer to a composite layout which contains one component */
+       blob_size = lov_mds_md_size(le16_to_cpu(lmm->lmm_stripe_count),
+                                   le32_to_cpu(lmm->lmm_magic));
+       size = sizeof(*lcm) + sizeof(*lcme) + blob_size;
+       OBD_ALLOC_LARGE(lmm_save, blob_size);
+       if (!lmm_save)
+               GOTO(out, rc = -ENOMEM);
+       memcpy(lmm_save, lmm, blob_size);
+       if (info->lti_ea_store_size < size) {
+               rc = lod_ea_store_resize(info, size);
+               if (rc)
+                       GOTO(out, rc);
+       }
+       lcm = info->lti_ea_store;
+       lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1);
+       lcm->lcm_size = cpu_to_le32(size);
+       lcm->lcm_layout_gen = cpu_to_le32(le16_to_cpu(
+                                               lmm_save->lmm_layout_gen));
+       lcm->lcm_flags = cpu_to_le16(LCM_FL_NOT_FLR);
+       lcm->lcm_entry_count = cpu_to_le16(1);
+       lcm->lcm_mirror_count = 0;
+       lcme = &lcm->lcm_entries[0];
+       lcme->lcme_flags = cpu_to_le32(LCME_FL_INIT);
+       lcme->lcme_extent.e_start = 0;
+       lcme->lcme_extent.e_end = cpu_to_le64(OBD_OBJECT_EOF);
+       lcme->lcme_offset = cpu_to_le32(sizeof(*lcm) + sizeof(*lcme));
+       lcme->lcme_size = cpu_to_le32(blob_size);
+       memcpy((char *)lcm + lcme->lcme_offset, (char *)lmm_save, blob_size);
+       EXIT;
+ out:
+       if (lmm_save)
+               OBD_FREE_LARGE(lmm_save, blob_size);
+       return rc;
+ }
+ /**
+  * Merge layouts to form a mirrored file.
+  */
+ static int lod_declare_layout_merge(const struct lu_env *env,
+               struct dt_object *dt, const struct lu_buf *mbuf,
+               struct thandle *th)
+ {
+       struct lod_thread_info  *info = lod_env_info(env);
+       struct lu_buf           *buf = &info->lti_buf;
+       struct lod_object       *lo = lod_dt_obj(dt);
+       struct lov_comp_md_v1   *lcm;
+       struct lov_comp_md_v1   *cur_lcm;
+       struct lov_comp_md_v1   *merge_lcm;
+       struct lov_comp_md_entry_v1     *lcme;
+       size_t size = 0;
+       size_t offset;
+       __u16 cur_entry_count;
+       __u16 merge_entry_count;
+       __u32 id = 0;
+       __u16 mirror_id = 0;
+       __u32 mirror_count;
+       int     rc, i;
+       ENTRY;
+       merge_lcm = mbuf->lb_buf;
+       if (mbuf->lb_len < sizeof(*merge_lcm))
+               RETURN(-EINVAL);
+       /* must be an existing layout from disk */
+       if (le32_to_cpu(merge_lcm->lcm_magic) != LOV_MAGIC_COMP_V1)
+               RETURN(-EINVAL);
+       merge_entry_count = le16_to_cpu(merge_lcm->lcm_entry_count);
+       /* do not allow to merge two mirrored files */
+       if (le16_to_cpu(merge_lcm->lcm_mirror_count))
+               RETURN(-EBUSY);
+       /* verify the target buffer */
+       rc = lod_get_lov_ea(env, lo);
+       if (rc <= 0)
+               RETURN(rc ? : -ENODATA);
+       cur_lcm = info->lti_ea_store;
+       switch (le32_to_cpu(cur_lcm->lcm_magic)) {
+       case LOV_MAGIC_V1:
+       case LOV_MAGIC_V3:
+               rc = lod_layout_convert(info);
+               break;
+       case LOV_MAGIC_COMP_V1:
+               rc = 0;
+               break;
+       default:
+               rc = -EINVAL;
+       }
+       if (rc)
+               RETURN(rc);
+       /* info->lti_ea_store could be reallocated in lod_layout_convert() */
+       cur_lcm = info->lti_ea_store;
+       cur_entry_count = le16_to_cpu(cur_lcm->lcm_entry_count);
+       /* 'lcm_mirror_count + 1' is the current # of mirrors the file has */
+       mirror_count = le16_to_cpu(cur_lcm->lcm_mirror_count) + 1;
+       if (mirror_count + 1 > LUSTRE_MIRROR_COUNT_MAX)
+               RETURN(-ERANGE);
+       /* size of new layout */
+       size = le32_to_cpu(cur_lcm->lcm_size) +
+              le32_to_cpu(merge_lcm->lcm_size) - sizeof(*cur_lcm);
+       memset(buf, 0, sizeof(*buf));
+       lu_buf_alloc(buf, size);
+       if (buf->lb_buf == NULL)
+               RETURN(-ENOMEM);
+       lcm = buf->lb_buf;
+       memcpy(lcm, cur_lcm, sizeof(*lcm) + cur_entry_count * sizeof(*lcme));
+       offset = sizeof(*lcm) +
+                sizeof(*lcme) * (cur_entry_count + merge_entry_count);
+       for (i = 0; i < cur_entry_count; i++) {
+               struct lov_comp_md_entry_v1 *cur_lcme;
+               lcme = &lcm->lcm_entries[i];
+               cur_lcme = &cur_lcm->lcm_entries[i];
+               lcme->lcme_offset = cpu_to_le32(offset);
+               memcpy((char *)lcm + offset,
+                      (char *)cur_lcm + le32_to_cpu(cur_lcme->lcme_offset),
+                      le32_to_cpu(lcme->lcme_size));
+               offset += le32_to_cpu(lcme->lcme_size);
+               if (mirror_count == 1) {
+                       /* new mirrored file, create new mirror ID */
+                       id = pflr_id(1, i + 1);
+                       lcme->lcme_id = cpu_to_le32(id);
+               }
+               id = MAX(le32_to_cpu(lcme->lcme_id), id);
+       }
+       mirror_id = mirror_id_of(id) + 1;
+       for (i = 0; i < merge_entry_count; i++) {
+               struct lov_comp_md_entry_v1 *merge_lcme;
+               merge_lcme = &merge_lcm->lcm_entries[i];
+               lcme = &lcm->lcm_entries[cur_entry_count + i];
+               *lcme = *merge_lcme;
+               lcme->lcme_offset = cpu_to_le32(offset);
+               id = pflr_id(mirror_id, i + 1);
+               lcme->lcme_id = cpu_to_le32(id);
+               memcpy((char *)lcm + offset,
+                      (char *)merge_lcm + le32_to_cpu(merge_lcme->lcme_offset),
+                      le32_to_cpu(lcme->lcme_size));
+               offset += le32_to_cpu(lcme->lcme_size);
+       }
+       /* fixup layout information */
+       lod_obj_inc_layout_gen(lo);
+       lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen);
+       lcm->lcm_size = cpu_to_le32(size);
+       lcm->lcm_entry_count = cpu_to_le16(cur_entry_count + merge_entry_count);
+       lcm->lcm_mirror_count = cpu_to_le16(mirror_count);
+       if ((le16_to_cpu(lcm->lcm_flags) & LCM_FL_FLR_MASK) == LCM_FL_NOT_FLR)
+               lcm->lcm_flags = cpu_to_le32(LCM_FL_RDONLY);
+       LASSERT(dt_write_locked(env, dt_object_child(dt)));
+       lod_object_free_striping(env, lo);
+       rc = lod_parse_striping(env, lo, buf);
+       if (rc)
+               GOTO(out, rc);
+       rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), buf,
+                                       XATTR_NAME_LOV, LU_XATTR_REPLACE, th);
+ out:
+       lu_buf_free(buf);
+       RETURN(rc);
+ }
+ /**
   * Implementation of dt_object_operations::do_declare_xattr_set.
   *
   * \see dt_object_operations::do_declare_xattr_set() in the API description
@@@ -2608,7 -2910,8 +2904,8 @@@ static int lod_declare_xattr_set(const 
        ENTRY;
  
        mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
-       if ((S_ISREG(mode) || mode == 0) && !(fl & LU_XATTR_REPLACE) &&
+       if ((S_ISREG(mode) || mode == 0) &&
+           !(fl & (LU_XATTR_REPLACE | LU_XATTR_MERGE)) &&
            (strcmp(name, XATTR_NAME_LOV) == 0 ||
             strcmp(name, XATTR_LUSTRE_LOV) == 0)) {
                /*
                        attr->la_mode = S_IFREG;
                }
                rc = lod_declare_striped_create(env, dt, attr, buf, th);
+       } else if (fl & LU_XATTR_MERGE) {
+               LASSERT(strcmp(name, XATTR_NAME_LOV) == 0 ||
+                       strcmp(name, XATTR_LUSTRE_LOV) == 0);
+               rc = lod_declare_layout_merge(env, dt, buf, th);
        } else if (S_ISREG(mode) &&
                   strlen(name) > strlen(XATTR_LUSTRE_LOV) + 1 &&
                   strncmp(name, XATTR_LUSTRE_LOV,
@@@ -3313,6 -3620,9 +3614,9 @@@ static int lod_layout_del(const struct 
                         sizeof(*comp_array) * lo->ldo_comp_cnt);
                lo->ldo_comp_entries = comp_array;
                lo->ldo_comp_cnt = left;
+               LASSERT(lo->ldo_mirror_count == 1);
+               lo->ldo_mirrors[0].lme_end = left - 1;
                lod_obj_inc_layout_gen(lo);
        } else {
                lod_free_comp_entries(lo);
@@@ -3560,6 -3870,7 +3864,7 @@@ static int lod_get_default_lov_striping
        struct lov_user_md_v3 *v3 = NULL;
        struct lov_comp_md_v1 *comp_v1 = NULL;
        __u16   comp_cnt;
+       __u16   mirror_cnt;
        bool    composite;
        int     rc, i;
        ENTRY;
                comp_cnt = comp_v1->lcm_entry_count;
                if (comp_cnt == 0)
                        RETURN(-EINVAL);
+               mirror_cnt = comp_v1->lcm_mirror_count + 1;
                composite = true;
        } else {
                comp_cnt = 1;
+               mirror_cnt = 0;
                composite = false;
        }
  
                RETURN(rc);
  
        lds->lds_def_comp_cnt = comp_cnt;
-       lds->lds_def_striping_is_composite = composite ? 1 : 0;
+       lds->lds_def_striping_is_composite = composite;
+       lds->lds_def_mirror_cnt = mirror_cnt;
  
        for (i = 0; i < comp_cnt; i++) {
                struct lod_layout_component *lod_comp;
@@@ -3741,11 -4055,14 +4049,14 @@@ static void lod_striping_from_default(s
        int i, rc;
  
        if (lds->lds_def_striping_set && S_ISREG(mode)) {
-               rc = lod_alloc_comp_entries(lo, lds->lds_def_comp_cnt);
+               rc = lod_alloc_comp_entries(lo, lds->lds_def_mirror_cnt,
+                                           lds->lds_def_comp_cnt);
                if (rc != 0)
                        return;
  
                lo->ldo_is_composite = lds->lds_def_striping_is_composite;
+               if (lds->lds_def_mirror_cnt > 1)
+                       lo->ldo_flr_state = LCM_FL_RDONLY;
  
                for (i = 0; i < lo->ldo_comp_cnt; i++) {
                        struct lod_layout_component *obj_comp =
@@@ -3876,8 -4193,6 +4187,8 @@@ static void lod_ah_init(const struct lu
                nextc->do_ops->do_ah_init(env, ah, nextp, nextc, child_mode);
  
        if (S_ISDIR(child_mode)) {
 +              const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
 +
                /* other default values are 0 */
                lc->ldo_dir_stripe_offset = -1;
  
                        lc->ldo_def_striping = lds;
  
                /* It should always honour the specified stripes */
 +              /* Note: old client (< 2.7)might also do lfs mkdir, whose EA
 +               * will have old magic. In this case, we should ignore the
 +               * stripe count and try to create dir by default stripe.
 +               */
                if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0 &&
 -                  lod_verify_md_striping(d, ah->dah_eadata) == 0) {
 -                      const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
 -
 +                  le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC) {
                        lc->ldo_dir_stripe_count =
                                le32_to_cpu(lum1->lum_stripe_count);
                        lc->ldo_dir_stripe_offset =
@@@ -4004,9 -4317,8 +4315,8 @@@ out
         * in config log, use them.
         */
        if (lod_need_inherit_more(lc, false)) {
                if (lc->ldo_comp_cnt == 0) {
-                       rc = lod_alloc_comp_entries(lc, 1);
+                       rc = lod_alloc_comp_entries(lc, 0, 1);
                        if (rc)
                                /* fail to allocate memory, will create a
                                 * non-striped file. */
@@@ -4055,6 -4367,7 +4365,7 @@@ static int lod_declare_init_size(const 
        struct lu_attr  *attr = &lod_env_info(env)->lti_attr;
        uint64_t        size, offs;
        int     i, rc, stripe, stripe_count = 0, stripe_size = 0;
+       struct lu_extent size_ext;
        ENTRY;
  
        if (!lod_obj_is_striped(dt))
        if (size == 0)
                RETURN(0);
  
+       size_ext = (typeof(size_ext)){ .e_start = size - 1, .e_end = size };
        for (i = 0; i < lo->ldo_comp_cnt; i++) {
                struct lod_layout_component *lod_comp;
                struct lu_extent *extent;
                        continue;
  
                extent = &lod_comp->llc_extent;
-               CDEBUG(D_INFO, "%lld [%lld, %lld)\n",
-                      size, extent->e_start, extent->e_end);
+               CDEBUG(D_INFO, "%lld "DEXT"\n", size, PEXT(extent));
                if (!lo->ldo_is_composite ||
-                   (size >= extent->e_start && size < extent->e_end)) {
+                   lu_extent_is_overlapped(extent, &size_ext)) {
                        objects = lod_comp->llc_stripe;
                        stripe_count = lod_comp->llc_stripe_count;
                        stripe_size = lod_comp->llc_stripe_size;
-                       break;
-               }
-       }
  
-       if (stripe_count == 0)
-               RETURN(0);
-       LASSERT(objects != NULL && stripe_size != 0);
+                       /* next mirror */
+                       if (stripe_count == 0)
+                               continue;
  
-       /* ll_do_div64(a, b) returns a % b, and a = a / b */
-       ll_do_div64(size, (__u64)stripe_size);
-       stripe = ll_do_div64(size, (__u64)stripe_count);
-       LASSERT(objects[stripe] != NULL);
+                       LASSERT(objects != NULL && stripe_size != 0);
+                       /* ll_do_div64(a, b) returns a % b, and a = a / b */
+                       ll_do_div64(size, (__u64)stripe_size);
+                       stripe = ll_do_div64(size, (__u64)stripe_count);
+                       LASSERT(objects[stripe] != NULL);
  
-       size = size * stripe_size;
-       offs = attr->la_size;
-       size += ll_do_div64(offs, stripe_size);
+                       size = size * stripe_size;
+                       offs = attr->la_size;
+                       size += ll_do_div64(offs, stripe_size);
  
-       attr->la_valid = LA_SIZE;
-       attr->la_size = size;
+                       attr->la_valid = LA_SIZE;
+                       attr->la_size = size;
  
-       rc = lod_sub_declare_attr_set(env, objects[stripe], attr, th);
+                       rc = lod_sub_declare_attr_set(env, objects[stripe],
+                                                     attr, th);
+               }
+       }
  
        RETURN(rc);
  }
  }
  
  /**
+  * Generate component ID for new created component.
+  *
+  * \param[in] lo              LOD object
+  * \param[in] comp_idx                index of ldo_comp_entries
+  *
+  * \retval                    component ID on success
+  * \retval                    LCME_ID_INVAL on failure
+  */
+ static __u32 lod_gen_component_id(struct lod_object *lo,
+                                 int mirror_id, int comp_idx)
+ {
+       struct lod_layout_component *lod_comp;
+       __u32   id, start, end;
+       int     i;
+       LASSERT(lo->ldo_comp_entries[comp_idx].llc_id == LCME_ID_INVAL);
+       lod_obj_inc_layout_gen(lo);
+       id = lo->ldo_layout_gen;
+       if (likely(id <= SEQ_ID_MAX))
+               RETURN(pflr_id(mirror_id, id & SEQ_ID_MASK));
+       /* Layout generation wraps, need to check collisions. */
+       start = id & SEQ_ID_MASK;
+       end = SEQ_ID_MAX;
+ again:
+       for (id = start; id <= end; id++) {
+               for (i = 0; i < lo->ldo_comp_cnt; i++) {
+                       lod_comp = &lo->ldo_comp_entries[i];
+                       if (pflr_id(mirror_id, id) == lod_comp->llc_id)
+                               break;
+               }
+               /* Found the ununsed ID */
+               if (i == lo->ldo_comp_cnt)
+                       RETURN(pflr_id(mirror_id, id));
+       }
+       if (end == LCME_ID_MAX) {
+               start = 1;
+               end = min(lo->ldo_layout_gen & LCME_ID_MASK,
+                         (__u32)(LCME_ID_MAX - 1));
+               goto again;
+       }
+       RETURN(LCME_ID_INVAL);
+ }
+ /**
   * Creation of a striped regular object.
   *
   * The function is called to create the stripe objects for a regular
@@@ -4317,15 -4677,28 +4675,28 @@@ int lod_striped_create(const struct lu_
  {
        struct lod_layout_component     *lod_comp;
        struct lod_object       *lo = lod_dt_obj(dt);
+       __u16   mirror_id;
        int     rc = 0, i, j;
        ENTRY;
  
        LASSERT(lo->ldo_comp_cnt != 0 && lo->ldo_comp_entries != NULL);
  
+       mirror_id = lo->ldo_mirror_count > 1 ? 1 : 0;
        /* create all underlying objects */
        for (i = 0; i < lo->ldo_comp_cnt; i++) {
                lod_comp = &lo->ldo_comp_entries[i];
  
+               if (lod_comp->llc_extent.e_start == 0 && i > 0) /* new mirror */
+                       ++mirror_id;
+               if (lod_comp->llc_id == LCME_ID_INVAL) {
+                       lod_comp->llc_id = lod_gen_component_id(lo,
+                                                               mirror_id, i);
+                       if (lod_comp->llc_id == LCME_ID_INVAL)
+                               GOTO(out, rc = -ERANGE);
+               }
                if (lod_comp_inited(lod_comp))
                        continue;
  
                        LASSERT(object != NULL);
                        rc = lod_sub_create(env, object, attr, NULL, dof, th);
                        if (rc)
-                               break;
+                               GOTO(out, rc);
                }
                lod_comp_set_init(lod_comp);
        }
  
-       if (rc == 0)
-               rc = lod_generate_and_set_lovea(env, lo, th);
+       rc = lod_fill_mirrors(lo);
+       if (rc)
+               GOTO(out, rc);
  
-       if (rc == 0)
-               lo->ldo_comp_cached = 1;
-       else
-               lod_object_free_striping(env, lo);
+       rc = lod_generate_and_set_lovea(env, lo, th);
+       if (rc)
+               GOTO(out, rc);
+       lo->ldo_comp_cached = 1;
+       RETURN(0);
  
+ out:
+       lod_object_free_striping(env, lo);
        RETURN(rc);
  }
  
@@@ -4393,7 -4771,8 +4769,8 @@@ static int lod_create(const struct lu_e
  static inline int
  lod_obj_stripe_destroy_cb(const struct lu_env *env, struct lod_object *lo,
                          struct dt_object *dt, struct thandle *th,
-                         int stripe_idx, struct lod_obj_stripe_cb_data *data)
+                         int comp_idx, int stripe_idx,
+                         struct lod_obj_stripe_cb_data *data)
  {
        if (data->locd_declare)
                return lod_sub_declare_destroy(env, dt, th);
@@@ -4485,11 -4864,11 +4862,11 @@@ static int lod_declare_destroy(const st
                                break;
                }
        } else {
-               struct lod_obj_stripe_cb_data data;
+               struct lod_obj_stripe_cb_data data = { { 0 } };
  
                data.locd_declare = true;
-               rc = lod_obj_for_each_stripe(env, lo, th,
-                               lod_obj_stripe_destroy_cb, &data);
+               data.locd_stripe_cb = lod_obj_stripe_destroy_cb;
+               rc = lod_obj_for_each_stripe(env, lo, th, &data);
        }
  
        RETURN(rc);
@@@ -4575,11 -4954,11 +4952,11 @@@ static int lod_destroy(const struct lu_
                        }
                }
        } else {
-               struct lod_obj_stripe_cb_data data;
+               struct lod_obj_stripe_cb_data data = { { 0 } };
  
                data.locd_declare = false;
-               rc = lod_obj_for_each_stripe(env, lo, th,
-                               lod_obj_stripe_destroy_cb, &data);
+               data.locd_stripe_cb = lod_obj_stripe_destroy_cb;
+               rc = lod_obj_for_each_stripe(env, lo, th, &data);
        }
  
        RETURN(rc);
@@@ -4837,29 -5216,78 +5214,78 @@@ static int lod_invalidate(const struct 
        return dt_invalidate(env, dt_object_child(dt));
  }
  
- static int lod_declare_layout_change(const struct lu_env *env,
-                                    struct dt_object *dt,
-                                    struct layout_intent *layout,
-                                    const struct lu_buf *buf,
-                                    struct thandle *th)
+ static int lod_layout_data_init(struct lod_thread_info *info, __u32 comp_cnt)
  {
-       struct lod_thread_info  *info = lod_env_info(env);
-       struct lod_object *lo = lod_dt_obj(dt);
-       struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
-       struct dt_object *next = dt_object_child(dt);
+       ENTRY;
+       /* clear memory region that will be used for layout change */
+       memset(&info->lti_layout_attr, 0, sizeof(struct lu_attr));
+       info->lti_count = 0;
+       if (info->lti_comp_size >= comp_cnt)
+               RETURN(0);
+       if (info->lti_comp_size > 0) {
+               OBD_FREE(info->lti_comp_idx,
+                        info->lti_comp_size * sizeof(__u32));
+               info->lti_comp_size = 0;
+       }
+       OBD_ALLOC(info->lti_comp_idx, comp_cnt * sizeof(__u32));
+       if (!info->lti_comp_idx)
+               RETURN(-ENOMEM);
+       info->lti_comp_size = comp_cnt;
+       RETURN(0);
+ }
+ static int lod_declare_instantiate_components(const struct lu_env *env,
+               struct lod_object *lo, struct thandle *th)
+ {
+       struct lod_thread_info *info = lod_env_info(env);
        struct ost_pool *inuse = &info->lti_inuse_osts;
+       int i;
+       int rc = 0;
+       ENTRY;
+       LASSERT(info->lti_count < lo->ldo_comp_cnt);
+       if (info->lti_count > 0) {
+               /* Prepare inuse array for composite file */
+               rc = lod_prepare_inuse(env, lo);
+               if (rc)
+                       RETURN(rc);
+       }
+       for (i = 0; i < info->lti_count; i++) {
+               rc = lod_qos_prep_create(env, lo, NULL, th,
+                                        info->lti_comp_idx[i], inuse);
+               if (rc)
+                       break;
+       }
+       if (!rc) {
+               info->lti_buf.lb_len = lod_comp_md_size(lo, false);
+               rc = lod_sub_declare_xattr_set(env, lod_object_child(lo),
+                               &info->lti_buf, XATTR_NAME_LOV, 0, th);
+       }
+       RETURN(rc);
+ }
+ static int lod_declare_update_plain(const struct lu_env *env,
+               struct lod_object *lo, struct layout_intent *layout,
+               const struct lu_buf *buf, struct thandle *th)
+ {
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lod_device *d = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
        struct lod_layout_component *lod_comp;
        struct lov_comp_md_v1 *comp_v1 = NULL;
        bool replay = false;
        int i, rc;
        ENTRY;
  
-       if (!S_ISREG(dt->do_lu.lo_header->loh_attr) || !dt_object_exists(dt) ||
-           dt_object_remote(next))
-               RETURN(-EINVAL);
+       LASSERT(lo->ldo_flr_state == LCM_FL_NOT_FLR);
  
-       dt_write_lock(env, next, 0);
        /*
         * In case the client is passing lovea, which only happens during
         * the replay of layout intent write RPC for now, we may need to
                if (rc <= 0)
                        GOTO(out, rc);
                /* old on-disk EA is stored in info->lti_buf */
-               comp_v1 = (struct lov_comp_md_v1 *)&info->lti_buf.lb_buf;
+               comp_v1 = (struct lov_comp_md_v1 *)info->lti_buf.lb_buf;
                replay = true;
        } else {
                /* non replay path */
                rc = lod_load_striping_locked(env, lo);
                if (rc)
                        GOTO(out, rc);
+       }
  
-               /* Prepare inuse array for composite file */
-               rc = lod_prepare_inuse(env, lo);
-               if (rc)
-                       GOTO(out, rc);
+       if (layout->li_opc == LAYOUT_INTENT_TRUNC) {
+               /**
+                * trunc transfers [size, eof) in the intent extent, while
+                * we'd instantiated components covers [0, size).
+                */
+               layout->li_extent.e_end = layout->li_extent.e_start;
+               layout->li_extent.e_start = 0;
        }
  
        /* Make sure defined layout covers the requested write range. */
        lod_comp = &lo->ldo_comp_entries[lo->ldo_comp_cnt - 1];
        if (lo->ldo_comp_cnt > 1 &&
            lod_comp->llc_extent.e_end != OBD_OBJECT_EOF &&
-           lod_comp->llc_extent.e_end < layout->li_end) {
+           lod_comp->llc_extent.e_end < layout->li_extent.e_end) {
                CDEBUG(replay ? D_ERROR : D_LAYOUT,
                       "%s: the defined layout [0, %#llx) does not covers "
-                      "the write range [%#llx, %#llx).\n",
+                      "the write range "DEXT"\n",
                       lod2obd(d)->obd_name, lod_comp->llc_extent.e_end,
-                      layout->li_start, layout->li_end);
+                      PEXT(&layout->li_extent));
                GOTO(out, rc = -EINVAL);
        }
  
+       CDEBUG(D_LAYOUT, "%s: "DFID": instantiate components "DEXT"\n",
+              lod2obd(d)->obd_name, PFID(lod_object_fid(lo)),
+              PEXT(&layout->li_extent));
        /*
         * Iterate ld->ldo_comp_entries, find the component whose extent under
         * the write range and not instantianted.
        for (i = 0; i < lo->ldo_comp_cnt; i++) {
                lod_comp = &lo->ldo_comp_entries[i];
  
-               if (lod_comp->llc_extent.e_start >= layout->li_end)
+               if (lod_comp->llc_extent.e_start >= layout->li_extent.e_end)
                        break;
  
                if (!replay) {
                if (lod_comp->llc_pattern & LOV_PATTERN_F_RELEASED)
                        GOTO(out, rc = -EINVAL);
  
-               need_create = true;
+               LASSERT(info->lti_comp_idx != NULL);
+               info->lti_comp_idx[info->lti_count++] = i;
+       }
  
-               rc = lod_qos_prep_create(env, lo, NULL, th, i, inuse);
-               if (rc)
+       if (info->lti_count == 0)
+               RETURN(-EALREADY);
+       lod_obj_inc_layout_gen(lo);
+       rc = lod_declare_instantiate_components(env, lo, th);
+ out:
+       if (rc)
+               lod_object_free_striping(env, lo);
+       RETURN(rc);
+ }
+ #define lod_foreach_mirror_comp(comp, lo, mirror_idx)                      \
+ for (comp = &lo->ldo_comp_entries[lo->ldo_mirrors[mirror_idx].lme_start];  \
+      comp <= &lo->ldo_comp_entries[lo->ldo_mirrors[mirror_idx].lme_end];   \
+      comp++)
+ static inline int lod_comp_index(struct lod_object *lo,
+                                struct lod_layout_component *lod_comp)
+ {
+       LASSERT(lod_comp >= lo->ldo_comp_entries &&
+               lod_comp <= &lo->ldo_comp_entries[lo->ldo_comp_cnt - 1]);
+       return lod_comp - lo->ldo_comp_entries;
+ }
+ /**
+  * Stale other mirrors by writing extent.
+  */
+ static void lod_stale_components(struct lod_object *lo, int primary,
+                                struct lu_extent *extent)
+ {
+       struct lod_layout_component *pri_comp, *lod_comp;
+       int i;
+       /* The writing extent decides which components in the primary
+        * are affected... */
+       CDEBUG(D_LAYOUT, "primary mirror %d, "DEXT"\n", primary, PEXT(extent));
+       lod_foreach_mirror_comp(pri_comp, lo, primary) {
+               if (!lu_extent_is_overlapped(extent, &pri_comp->llc_extent))
+                       continue;
+               CDEBUG(D_LAYOUT, "primary comp %u "DEXT"\n",
+                      lod_comp_index(lo, pri_comp),
+                      PEXT(&pri_comp->llc_extent));
+               for (i = 0; i < lo->ldo_mirror_count; i++) {
+                       if (i == primary)
+                               continue;
+                       /* ... and then stale other components that are
+                        * overlapping with primary components */
+                       lod_foreach_mirror_comp(lod_comp, lo, i) {
+                               if (!lu_extent_is_overlapped(
+                                                       &pri_comp->llc_extent,
+                                                       &lod_comp->llc_extent))
+                                       continue;
+                               CDEBUG(D_LAYOUT, "stale: %u / %u\n",
+                                     i, lod_comp_index(lo, lod_comp));
+                               lod_comp->llc_flags |= LCME_FL_STALE;
+                               lo->ldo_mirrors[i].lme_stale = 1;
+                       }
+               }
+       }
+ }
+ static int lod_declare_update_rdonly(const struct lu_env *env,
+               struct lod_object *lo, struct md_layout_change *mlc,
+               struct thandle *th)
+ {
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lu_attr *layout_attr = &info->lti_layout_attr;
+       struct lod_layout_component *lod_comp;
+       struct layout_intent *layout = mlc->mlc_intent;
+       struct lu_extent extent = layout->li_extent;
+       unsigned int seq = 0;
+       int picked;
+       int i;
+       int rc;
+       ENTRY;
+       LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE);
+       LASSERT(lo->ldo_flr_state == LCM_FL_RDONLY);
+       LASSERT(lo->ldo_mirror_count > 0);
+       CDEBUG(D_LAYOUT, DFID": trying to write :"DEXT"\n",
+              PFID(lod_object_fid(lo)), PEXT(&extent));
+       if (OBD_FAIL_CHECK(OBD_FAIL_FLR_RANDOM_PICK_MIRROR)) {
+               get_random_bytes(&seq, sizeof(seq));
+               seq %= lo->ldo_mirror_count;
+       }
+       /**
+        * Pick a mirror as the primary.
+        * Now it only picks the first mirror, this algo can be
+        * revised later after knowing the topology of cluster or
+        * the availability of OSTs.
+        */
+       for (picked = -1, i = 0; i < lo->ldo_mirror_count; i++) {
+               int index = (i + seq) % lo->ldo_mirror_count;
+               if (!lo->ldo_mirrors[index].lme_stale) {
+                       picked = index;
                        break;
+               }
        }
+       if (picked < 0) /* failed to pick a primary */
+               RETURN(-ENODATA);
  
-       if (need_create)
-               lod_obj_inc_layout_gen(lo);
-       else
-               GOTO(unlock, rc = -EALREADY);
+       CDEBUG(D_LAYOUT, DFID": picked mirror %u as primary\n",
+              PFID(lod_object_fid(lo)), lo->ldo_mirrors[picked].lme_id);
  
-       if (!rc) {
-               info->lti_buf.lb_len = lod_comp_md_size(lo, false);
-               rc = lod_sub_declare_xattr_set(env, next, &info->lti_buf,
-                                              XATTR_NAME_LOV, 0, th);
+       /* stale overlapping components from other mirrors */
+       lod_stale_components(lo, picked, &extent);
+       /* instantiate components for the picked mirror, start from 0 */
+       if (layout->li_opc == LAYOUT_INTENT_TRUNC) {
+               /**
+                * trunc transfers [size, eof) in the intent extent, we'd
+                * stale components overlapping [size, eof), while we'd
+                * instantiated components covers [0, size).
+                */
+               extent.e_end = extent.e_start;
+       }
+       extent.e_start = 0;
+       lod_foreach_mirror_comp(lod_comp, lo, picked) {
+               if (!lu_extent_is_overlapped(&extent,
+                                            &lod_comp->llc_extent))
+                       break;
+               if (lod_comp_inited(lod_comp))
+                       continue;
+               CDEBUG(D_LAYOUT, "instantiate: %u / %u\n",
+                      i, lod_comp_index(lo, lod_comp));
+               info->lti_comp_idx[info->lti_count++] =
+                                               lod_comp_index(lo, lod_comp);
+       }
+       lo->ldo_flr_state = LCM_FL_WRITE_PENDING;
+       /* Reset the layout version once it's becoming too large.
+        * This way it can make sure that the layout version is
+        * monotonously increased in this writing era. */
+       lod_obj_inc_layout_gen(lo);
+       if (lo->ldo_layout_gen > (LCME_ID_MAX >> 1)) {
+               __u32 layout_version;
+               cfs_get_random_bytes(&layout_version, sizeof(layout_version));
+               lo->ldo_layout_gen = layout_version & 0xffff;
        }
+       rc = lod_declare_instantiate_components(env, lo, th);
+       if (rc)
+               GOTO(out, rc);
+       layout_attr->la_valid = LA_LAYOUT_VERSION;
+       layout_attr->la_layout_version = 0; /* set current version */
+       rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th);
+       if (rc)
+               GOTO(out, rc);
  out:
        if (rc)
                lod_object_free_striping(env, lo);
+       RETURN(rc);
+ }
  
- unlock:
-       dt_write_unlock(env, next);
+ static int lod_declare_update_write_pending(const struct lu_env *env,
+               struct lod_object *lo, struct md_layout_change *mlc,
+               struct thandle *th)
+ {
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lu_attr *layout_attr = &info->lti_layout_attr;
+       struct lod_layout_component *lod_comp;
+       struct lu_extent extent = { 0 };
+       int primary = -1;
+       int i;
+       int rc;
+       ENTRY;
+       LASSERT(lo->ldo_flr_state == LCM_FL_WRITE_PENDING);
+       LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE ||
+               mlc->mlc_opc == MD_LAYOUT_RESYNC);
+       /* look for the primary mirror */
+       for (i = 0; i < lo->ldo_mirror_count; i++) {
+               if (lo->ldo_mirrors[i].lme_stale)
+                       continue;
+               LASSERTF(primary < 0, DFID " has multiple primary: %u / %u",
+                        PFID(lod_object_fid(lo)),
+                        lo->ldo_mirrors[i].lme_id,
+                        lo->ldo_mirrors[primary].lme_id);
+               primary = i;
+       }
+       if (primary < 0) {
+               CERROR(DFID ": doesn't have a primary mirror\n",
+                      PFID(lod_object_fid(lo)));
+               GOTO(out, rc = -ENODATA);
+       }
+       CDEBUG(D_LAYOUT, DFID": found primary %u\n",
+              PFID(lod_object_fid(lo)), lo->ldo_mirrors[primary].lme_id);
+       LASSERT(!lo->ldo_mirrors[primary].lme_stale);
+       /* for LAYOUT_WRITE opc, it has to do the following operations:
+        * 1. stale overlapping componets from stale mirrors;
+        * 2. instantiate components of the primary mirror;
+        * 3. transfter layout version to all objects of the primary;
+        *
+        * for LAYOUT_RESYNC opc, it will do:
+        * 1. instantiate components of all stale mirrors;
+        * 2. transfer layout version to all objects to close write era. */
+       if (mlc->mlc_opc == MD_LAYOUT_WRITE) {
+               LASSERT(mlc->mlc_intent != NULL);
+               extent = mlc->mlc_intent->li_extent;
+               CDEBUG(D_LAYOUT, DFID": intent to write: "DEXT"\n",
+                      PFID(lod_object_fid(lo)), PEXT(&extent));
+               /* 1. stale overlapping components */
+               lod_stale_components(lo, primary, &extent);
+               /* 2. find out the components need instantiating.
+                * instantiate [0, mlc->mlc_intent->e_end) */
+               if (mlc->mlc_intent->li_opc == LAYOUT_INTENT_TRUNC) {
+                       /**
+                        * trunc transfers [size, eof) in the intent extent,
+                        * we'd stale components overlapping [size, eof),
+                        * while we'd instantiated components covers [0, size).
+                        */
+                       extent.e_end = extent.e_start;
+               }
+               extent.e_start = 0;
+               lod_foreach_mirror_comp(lod_comp, lo, primary) {
+                       if (!lu_extent_is_overlapped(&extent,
+                                                    &lod_comp->llc_extent))
+                               break;
+                       if (lod_comp_inited(lod_comp))
+                               continue;
+                       CDEBUG(D_LAYOUT, "write instantiate %d / %d\n",
+                              primary, lod_comp_index(lo, lod_comp));
+                       info->lti_comp_idx[info->lti_count++] =
+                                               lod_comp_index(lo, lod_comp);
+               }
+       } else { /* MD_LAYOUT_RESYNC */
+               /* figure out the components that have been instantiated in
+                * in primary to decide what components should be instantiated
+                * in stale mirrors */
+               lod_foreach_mirror_comp(lod_comp, lo, primary) {
+                       if (!lod_comp_inited(lod_comp))
+                               break;
+                       extent.e_end = lod_comp->llc_extent.e_end;
+               }
+               CDEBUG(D_LAYOUT,
+                      DFID": instantiate all stale components in "DEXT"\n",
+                      PFID(lod_object_fid(lo)), PEXT(&extent));
+               /* 1. instantiate all components within this extent, even
+                * non-stale components so that it won't need to instantiate
+                * those components for mirror truncate later. */
+               for (i = 0; i < lo->ldo_mirror_count; i++) {
+                       if (primary == i)
+                               continue;
+                       LASSERTF(lo->ldo_mirrors[i].lme_stale,
+                                "both %d and %d are primary\n", i, primary);
+                       lod_foreach_mirror_comp(lod_comp, lo, i) {
+                               if (!lu_extent_is_overlapped(&extent,
+                                                       &lod_comp->llc_extent))
+                                       break;
+                               if (lod_comp_inited(lod_comp))
+                                       continue;
+                               CDEBUG(D_LAYOUT, "resync instantiate %d / %d\n",
+                                      i, lod_comp_index(lo, lod_comp));
+                               info->lti_comp_idx[info->lti_count++] =
+                                               lod_comp_index(lo, lod_comp);
+                       }
+               }
+               /* change the file state to SYNC_PENDING */
+               lo->ldo_flr_state = LCM_FL_SYNC_PENDING;
+       }
  
+       rc = lod_declare_instantiate_components(env, lo, th);
+       if (rc)
+               GOTO(out, rc);
+       /* 3. transfer layout version to OST objects.
+        * transfer new layout version to OST objects so that stale writes
+        * can be denied. It also ends an era of writing by setting
+        * LU_LAYOUT_RESYNC. Normal client can never use this bit to
+        * send write RPC; only resync RPCs could do it. */
+       layout_attr->la_valid = LA_LAYOUT_VERSION;
+       layout_attr->la_layout_version = 0; /* set current version */
+       if (mlc->mlc_opc == MD_LAYOUT_RESYNC)
+               layout_attr->la_layout_version = LU_LAYOUT_RESYNC;
+       rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th);
+       if (rc)
+               GOTO(out, rc);
+       lod_obj_inc_layout_gen(lo);
+ out:
+       if (rc)
+               lod_object_free_striping(env, lo);
+       RETURN(rc);
+ }
+ static int lod_declare_update_sync_pending(const struct lu_env *env,
+               struct lod_object *lo, struct md_layout_change *mlc,
+               struct thandle *th)
+ {
+       struct lod_thread_info  *info = lod_env_info(env);
+       unsigned sync_components = 0;
+       unsigned resync_components = 0;
+       int i;
+       int rc;
+       ENTRY;
+       LASSERT(lo->ldo_flr_state == LCM_FL_SYNC_PENDING);
+       LASSERT(mlc->mlc_opc == MD_LAYOUT_RESYNC_DONE ||
+               mlc->mlc_opc == MD_LAYOUT_WRITE);
+       CDEBUG(D_LAYOUT, DFID ": received op %d in sync pending\n",
+              PFID(lod_object_fid(lo)), mlc->mlc_opc);
+       if (mlc->mlc_opc == MD_LAYOUT_WRITE) {
+               CDEBUG(D_LAYOUT, DFID": cocurrent write to sync pending\n",
+                      PFID(lod_object_fid(lo)));
+               lo->ldo_flr_state = LCM_FL_WRITE_PENDING;
+               return lod_declare_update_write_pending(env, lo, mlc, th);
+       }
+       /* MD_LAYOUT_RESYNC_DONE */
+       for (i = 0; i < lo->ldo_comp_cnt; i++) {
+               struct lod_layout_component *lod_comp;
+               int j;
+               lod_comp = &lo->ldo_comp_entries[i];
+               if (!(lod_comp->llc_flags & LCME_FL_STALE)) {
+                       sync_components++;
+                       continue;
+               }
+               for (j = 0; j < mlc->mlc_resync_count; j++) {
+                       if (lod_comp->llc_id != mlc->mlc_resync_ids[j])
+                               continue;
+                       mlc->mlc_resync_ids[j] = LCME_ID_INVAL;
+                       lod_comp->llc_flags &= ~LCME_FL_STALE;
+                       resync_components++;
+                       break;
+               }
+       }
+       /* valid check */
+       for (i = 0; i < mlc->mlc_resync_count; i++) {
+               if (mlc->mlc_resync_ids[i] == LCME_ID_INVAL)
+                       continue;
+               CDEBUG(D_LAYOUT, DFID": lcme id %u (%d / %zd) not exist "
+                      "or already synced\n", PFID(lod_object_fid(lo)),
+                      mlc->mlc_resync_ids[i], i, mlc->mlc_resync_count);
+               GOTO(out, rc = -EINVAL);
+       }
+       if (!sync_components || !resync_components) {
+               CDEBUG(D_LAYOUT, DFID": no mirror in sync or resync\n",
+                      PFID(lod_object_fid(lo)));
+               /* tend to return an error code here to prevent
+                * the MDT from setting SoM attribute */
+               GOTO(out, rc = -EINVAL);
+       }
+       CDEBUG(D_LAYOUT, DFID": resynced %u/%zu components\n",
+              PFID(lod_object_fid(lo)),
+              resync_components, mlc->mlc_resync_count);
+       lo->ldo_flr_state = LCM_FL_RDONLY;
+       lod_obj_inc_layout_gen(lo);
+       info->lti_buf.lb_len = lod_comp_md_size(lo, false);
+       rc = lod_sub_declare_xattr_set(env, lod_object_child(lo),
+                                      &info->lti_buf, XATTR_NAME_LOV, 0, th);
+       EXIT;
+ out:
+       if (rc)
+               lod_object_free_striping(env, lo);
+       RETURN(rc);
+ }
+ static int lod_declare_layout_change(const struct lu_env *env,
+               struct dt_object *dt, struct md_layout_change *mlc,
+               struct thandle *th)
+ {
+       struct lod_thread_info  *info = lod_env_info(env);
+       struct lod_object *lo = lod_dt_obj(dt);
+       int rc;
+       ENTRY;
+       if (!S_ISREG(dt->do_lu.lo_header->loh_attr) || !dt_object_exists(dt) ||
+           dt_object_remote(dt_object_child(dt)))
+               RETURN(-EINVAL);
+       lod_write_lock(env, dt, 0);
+       rc = lod_load_striping_locked(env, lo);
+       if (rc)
+               GOTO(out, rc);
+       LASSERT(lo->ldo_comp_cnt > 0);
+       rc = lod_layout_data_init(info, lo->ldo_comp_cnt);
+       if (rc)
+               GOTO(out, rc);
+       switch (lo->ldo_flr_state) {
+       case LCM_FL_NOT_FLR:
+               rc = lod_declare_update_plain(env, lo, mlc->mlc_intent,
+                                             &mlc->mlc_buf, th);
+               break;
+       case LCM_FL_RDONLY:
+               rc = lod_declare_update_rdonly(env, lo, mlc, th);
+               break;
+       case LCM_FL_WRITE_PENDING:
+               rc = lod_declare_update_write_pending(env, lo, mlc, th);
+               break;
+       case LCM_FL_SYNC_PENDING:
+               rc = lod_declare_update_sync_pending(env, lo, mlc, th);
+               break;
+       default:
+               rc = -ENOTSUPP;
+               break;
+       }
+ out:
+       dt_write_unlock(env, dt);
        RETURN(rc);
  }
  
   * Instantiate layout component objects which covers the intent write offset.
   */
  static int lod_layout_change(const struct lu_env *env, struct dt_object *dt,
-                            struct layout_intent *layout,
-                            const struct lu_buf *buf, struct thandle *th)
+                            struct md_layout_change *mlc, struct thandle *th)
  {
        struct lu_attr *attr = &lod_env_info(env)->lti_attr;
+       struct lu_attr *layout_attr = &lod_env_info(env)->lti_layout_attr;
+       struct lod_object *lo = lod_dt_obj(dt);
+       int rc;
  
-       RETURN(lod_striped_create(env, dt, attr, NULL, th));
+       rc = lod_striped_create(env, dt, attr, NULL, th);
+       if (!rc && layout_attr->la_valid & LA_LAYOUT_VERSION) {
+               layout_attr->la_layout_version |= lo->ldo_layout_gen;
+               rc = lod_attr_set(env, dt, layout_attr, th);
+       }
+       return rc;
  }
  
  struct dt_object_operations lod_obj_ops = {
diff --combined lustre/mdc/mdc_lib.c
@@@ -116,11 -116,7 +116,11 @@@ static void mdc_pack_name(struct ptlrpc
  
        cpy_len = strlcpy(buf, name, buf_size);
  
 -      LASSERT(cpy_len == name_len && lu_name_is_valid_2(buf, cpy_len));
 +      LASSERT(lu_name_is_valid_2(buf, cpy_len));
 +      if (cpy_len != name_len)
 +              CDEBUG(D_DENTRY, "%s: %s len %zd != %zd, concurrent rename?\n",
 +                     req->rq_export->exp_obd->obd_name, buf, name_len,
 +                     cpy_len);
  }
  
  void mdc_file_secctx_pack(struct ptlrpc_request *req, const char *secctx_name,
@@@ -444,8 -440,7 +444,7 @@@ static void mdc_intent_close_pack(struc
        struct ldlm_lock        *lock;
        enum mds_op_bias         bias = op_data->op_bias;
  
-       if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |
-                     MDS_RENAME_MIGRATE)))
+       if (!(bias & (MDS_CLOSE_INTENT | MDS_RENAME_MIGRATE)))
                return;
  
        data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
  
        data->cd_data_version = op_data->op_data_version;
        data->cd_fid = op_data->op_fid2;
+       if (bias & MDS_CLOSE_RESYNC_DONE) {
+               struct close_data_resync_done *sync = &data->cd_resync;
+               CLASSERT(sizeof(data->cd_resync) <= sizeof(data->cd_reserved));
+               sync->resync_count = op_data->op_data_size / sizeof(__u32);
+               if (sync->resync_count <= INLINE_RESYNC_ARRAY_SIZE) {
+                       memcpy(sync->resync_ids_inline, op_data->op_data,
+                              op_data->op_data_size);
+               } else {
+                       size_t count = sync->resync_count;
+                       memcpy(req_capsule_client_get(&req->rq_pill, &RMF_U32),
+                               op_data->op_data, count * sizeof(__u32));
+               }
+       }
  }
  
  void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
diff --combined lustre/mdt/mdt_handler.c
@@@ -732,6 -732,8 +732,8 @@@ void mdt_pack_attr2body(struct mdt_thre
                        else
                                b->mbo_blocks = 1;
                        b->mbo_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+               } else if (info->mti_som_valid) { /* som is valid */
+                       b->mbo_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
                }
        }
  
@@@ -992,6 -994,9 +994,9 @@@ int mdt_attr_get_complex(struct mdt_thr
                rc = mo_attr_get(env, next, ma);
                if (rc)
                        GOTO(out, rc);
+               if (S_ISREG(mode))
+                       (void) mdt_get_som(info, o, &ma->ma_attr);
                ma->ma_valid |= MA_INODE;
        }
  
   *
   * \param[in] info    thread environment
   * \param[in] obj     object
-  * \param[in] layout  layout intent
-  * \param[in] buf     buffer containing client's lovea, could be empty
+  * \param[in] layout  layout change descriptor
   *
   * \retval 0  on success
   * \retval < 0        error code
   */
- static int mdt_layout_change(struct mdt_thread_info *info,
-                            struct mdt_object *obj,
-                            struct layout_intent *layout,
-                            const struct lu_buf *buf)
+ int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj,
+                     struct md_layout_change *layout)
  {
        struct mdt_lock_handle *lh = &info->mti_lh[MDT_LH_LOCAL];
        int rc;
        ENTRY;
  
-       CDEBUG(D_INFO, "got layout change request from client: "
-              "opc:%u flags:%#x extent[%#llx,%#llx)\n",
-              layout->li_opc, layout->li_flags,
-              layout->li_start, layout->li_end);
-       if (layout->li_start >= layout->li_end) {
-               CERROR("Recieved an invalid layout change range [%llu, %llu) "
-                      "for "DFID"\n", layout->li_start, layout->li_end,
-                      PFID(mdt_object_fid(obj)));
-               RETURN(-EINVAL);
-       }
+       if (!mdt_object_exists(obj))
+               GOTO(out, rc = -ENOENT);
  
        if (!S_ISREG(lu_object_attr(&obj->mot_obj)))
                GOTO(out, rc = -EINVAL);
  
        /* take layout lock to prepare layout change */
        mdt_lock_reg_init(lh, LCK_EX);
-       rc = mdt_object_lock(info, obj, lh,
-                            MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
+       rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LAYOUT);
        if (rc)
                GOTO(out, rc);
  
-       rc = mo_layout_change(info->mti_env, mdt_object_child(obj), layout,
-                             buf);
+       rc = mo_layout_change(info->mti_env, mdt_object_child(obj), layout);
  
        mdt_object_unlock(info, obj, lh, 1);
  out:
@@@ -1733,7 -1725,7 +1725,7 @@@ static int mdt_getattr_name_lock(struc
                          if (ma->ma_valid & MA_INODE &&
                              ma->ma_attr.la_valid & LA_CTIME &&
                              info->mti_mdt->mdt_namespace->ns_ctime_age_limit +
 -                                ma->ma_attr.la_ctime < cfs_time_current_sec())
 +                              ma->ma_attr.la_ctime < ktime_get_real_seconds())
                                  child_bits |= MDS_INODELOCK_UPDATE;
                  }
  
@@@ -2130,7 -2122,8 +2122,8 @@@ static int mdt_reint(struct tgt_session
                [REINT_OPEN]     = &RQF_MDS_REINT_OPEN,
                [REINT_SETXATTR] = &RQF_MDS_REINT_SETXATTR,
                [REINT_RMENTRY]  = &RQF_MDS_REINT_UNLINK,
-               [REINT_MIGRATE]  = &RQF_MDS_REINT_RENAME
+               [REINT_MIGRATE]  = &RQF_MDS_REINT_RENAME,
+               [REINT_RESYNC]   = &RQF_MDS_REINT_RESYNC,
        };
  
        ENTRY;
@@@ -3282,6 -3275,7 +3275,7 @@@ void mdt_thread_info_init(struct ptlrpc
          info->mti_opdata = 0;
        info->mti_big_lmm_used = 0;
        info->mti_big_acl_used = 0;
+       info->mti_som_valid = 0;
  
          info->mti_spec.no_create = 0;
        info->mti_spec.sp_rm_entry = 0;
@@@ -3738,10 -3732,10 +3732,10 @@@ static int mdt_intent_layout(enum mdt_i
                             __u64 flags)
  {
        struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_LAYOUT];
-       struct layout_intent *layout;
-       struct lu_fid *fid;
+       struct md_layout_change layout = { .mlc_opc = MD_LAYOUT_NOP };
+       struct layout_intent *intent;
+       struct lu_fid *fid = &info->mti_tmp_fid2;
        struct mdt_object *obj = NULL;
-       bool layout_change = false;
        int layout_size = 0;
        int rc = 0;
        ENTRY;
                RETURN(-EINVAL);
        }
  
-       layout = req_capsule_client_get(info->mti_pill, &RMF_LAYOUT_INTENT);
-       if (layout == NULL)
+       fid_extract_from_res_name(fid, &(*lockp)->l_resource->lr_name);
+       intent = req_capsule_client_get(info->mti_pill, &RMF_LAYOUT_INTENT);
+       if (intent == NULL)
                RETURN(-EPROTO);
  
-       switch (layout->li_opc) {
+       CDEBUG(D_INFO, DFID "got layout change request from client: "
+              "opc:%u flags:%#x extent "DEXT"\n",
+              PFID(fid), intent->li_opc, intent->li_flags,
+              PEXT(&intent->li_extent));
+       switch (intent->li_opc) {
        case LAYOUT_INTENT_TRUNC:
        case LAYOUT_INTENT_WRITE:
-               layout_change = true;
+               layout.mlc_opc = MD_LAYOUT_WRITE;
+               layout.mlc_intent = intent;
                break;
        case LAYOUT_INTENT_ACCESS:
                break;
        case LAYOUT_INTENT_RELEASE:
        case LAYOUT_INTENT_RESTORE:
                CERROR("%s: Unsupported layout intent opc %d\n",
-                      mdt_obd_name(info->mti_mdt), layout->li_opc);
+                      mdt_obd_name(info->mti_mdt), intent->li_opc);
                rc = -ENOTSUPP;
                break;
        default:
                CERROR("%s: Unknown layout intent opc %d\n",
-                      mdt_obd_name(info->mti_mdt), layout->li_opc);
+                      mdt_obd_name(info->mti_mdt), intent->li_opc);
                rc = -EINVAL;
                break;
        }
        if (rc < 0)
                RETURN(rc);
  
-       fid = &info->mti_tmp_fid2;
-       fid_extract_from_res_name(fid, &(*lockp)->l_resource->lr_name);
        /* Get lock from request for possible resent case. */
        mdt_intent_fixup_resent(info, *lockp, lhc, flags);
  
                GOTO(out_obj, rc);
  
  
-       if (layout_change) {
-               struct lu_buf *buf = &info->mti_buf;
+       if (layout.mlc_opc != MD_LAYOUT_NOP) {
+               struct lu_buf *buf = &layout.mlc_buf;
  
                /**
                 * mdt_layout_change is a reint operation, when the request
                 * lovea, then it's a replay of the layout intent write
                 * RPC.
                 */
-               rc = mdt_layout_change(info, obj, layout, buf);
+               rc = mdt_layout_change(info, obj, &layout);
                if (rc)
                        GOTO(out_obj, rc);
        }
@@@ -147,10 -147,10 +147,10 @@@ struct coordinator 
                                                       * list */
        struct mutex             cdt_restore_lock;    /**< protect restore
                                                       * list */
 -      cfs_time_t               cdt_loop_period;     /**< llog scan period */
 -      cfs_time_t               cdt_grace_delay;     /**< request grace
 +      time64_t                 cdt_loop_period;     /**< llog scan period */
 +      time64_t                 cdt_grace_delay;     /**< request grace
                                                       * delay */
 -      cfs_time_t               cdt_active_req_timeout; /**< request timeout */
 +      time64_t                 cdt_active_req_timeout; /**< request timeout */
        __u32                    cdt_default_archive_id; /**< archive id used
                                                       * when none are
                                                       * specified */
@@@ -264,8 -264,6 +264,8 @@@ struct mdt_device 
  #define MDT_SERVICE_WATCHDOG_FACTOR   (2)
  #define MDT_COS_DEFAULT         (0)
  
 +#define ENOENT_VERSION 1      /** 'virtual' version of non-existent object */
 +
  struct mdt_object {
        struct lu_object_header mot_header;
        struct lu_object        mot_obj;
@@@ -400,7 -398,8 +400,8 @@@ struct mdt_thread_info 
                                   mti_cross_ref:1,
        /* big_lmm buffer was used and must be used in reply */
                                   mti_big_lmm_used:1,
-                                  mti_big_acl_used:1;
+                                  mti_big_acl_used:1,
+                                  mti_som_valid:1;
  
          /* opdata for mdt_reint_open(), has the same as
           * ldlm_reply:lock_policy_res1.  mdt_update_last_rcvd() stores this
        char                       mti_xattr_buf[128];
        struct ldlm_enqueue_info   mti_einfo;
        struct tg_reply_data      *mti_reply_data;
+       struct lustre_som_attrs    mti_som;
+       /* FLR: layout change API */
+       struct md_layout_change    mti_layout;
  };
  
  extern struct lu_context_key mdt_thread_key;
@@@ -504,8 -508,8 +510,8 @@@ struct cdt_agent_req 
        struct obd_uuid          car_uuid;         /**< agent doing the req. */
        __u32                    car_archive_id;   /**< archive id */
        int                      car_canceled;     /**< request was canceled */
 -      cfs_time_t               car_req_start;    /**< start time */
 -      cfs_time_t               car_req_update;   /**< last update time */
 +      time64_t                 car_req_start;    /**< start time */
 +      time64_t                 car_req_update;   /**< last update time */
        struct hsm_action_item  *car_hai;          /**< req. to the agent */
        struct cdt_req_progress  car_progress;     /**< track data mvt
                                                    *   progress */
@@@ -788,6 -792,8 +794,8 @@@ int mdt_fix_reply(struct mdt_thread_inf
  int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *,
                           struct md_attr *);
  void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *);
+ int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj,
+                     struct md_layout_change *spec);
  
  struct lu_buf *mdt_buf(const struct lu_env *env, void *area, ssize_t len);
  const struct lu_buf *mdt_buf_const(const struct lu_env *env,
@@@ -1113,6 -1119,12 +1121,12 @@@ static inline enum ldlm_mode mdt_mdl_mo
        return mdt_dlm_lock_modes[mode];
  }
  
+ /* mdt_som.c */
+ int mdt_set_som(struct mdt_thread_info *info, struct mdt_object *obj,
+               struct lu_attr *attr);
+ int mdt_get_som(struct mdt_thread_info *info, struct mdt_object *obj,
+               struct lu_attr *attr);
  /* mdt_lvb.c */
  extern struct ldlm_valblock_ops mdt_lvbo;
  int mdt_dom_lvb_is_valid(struct ldlm_resource *res);
diff --combined lustre/osc/osc_request.c
@@@ -1411,7 -1411,7 +1411,7 @@@ static void dump_all_bulk_pages(struct 
  }
  
  static int
 -check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
 +check_write_checksum(struct obdo *oa, const struct lnet_process_id *peer,
                                __u32 client_cksum, __u32 server_cksum,
                                struct osc_brw_async_args *aa)
  {
@@@ -1778,7 -1778,7 +1778,7 @@@ static int brw_interpret(const struct l
          CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc);
          /* When server return -EINPROGRESS, client should always retry
           * regardless of the number of times the bulk was resent already. */
-       if (osc_recoverable_error(rc)) {
+       if (osc_recoverable_error(rc) && !req->rq_no_delay) {
                if (req->rq_import_generation !=
                    req->rq_import->imp_generation) {
                        CDEBUG(D_HA, "%s: resend cross eviction for object: "
  
        list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
                list_del_init(&ext->oe_link);
-               osc_extent_finish(env, ext, 1, rc);
+               osc_extent_finish(env, ext, 1,
+                                 rc && req->rq_no_delay ? -EWOULDBLOCK : rc);
        }
        LASSERT(list_empty(&aa->aa_exts));
        LASSERT(list_empty(&aa->aa_oaps));
@@@ -1927,9 -1928,11 +1928,11 @@@ int osc_build_rpc(const struct lu_env *
        int                             page_count = 0;
        bool                            soft_sync = false;
        bool                            interrupted = false;
+       bool                            ndelay = false;
        int                             i;
        int                             grant = 0;
        int                             rc;
+       __u32                           layout_version = 0;
        struct list_head                rpc_list = LIST_HEAD_INIT(rpc_list);
        struct ost_body                 *body;
        ENTRY;
                mem_tight |= ext->oe_memalloc;
                grant += ext->oe_grants;
                page_count += ext->oe_nr_pages;
+               layout_version = MAX(layout_version, ext->oe_layout_version);
                if (obj == NULL)
                        obj = ext->oe_obj;
        }
                        if (oap->oap_interrupted)
                                interrupted = true;
                }
+               if (ext->oe_ndelay)
+                       ndelay = true;
        }
  
        /* first page in the list */
        crattr->cra_oa = oa;
        cl_req_attr_set(env, osc2cl(obj), crattr);
  
-       if (cmd == OBD_BRW_WRITE)
+       if (cmd == OBD_BRW_WRITE) {
                oa->o_grant_used = grant;
+               if (layout_version > 0) {
+                       CDEBUG(D_LAYOUT, DFID": write with layout version %u\n",
+                              PFID(&oa->o_oi.oi_fid), layout_version);
+                       oa->o_layout_version = layout_version;
+                       oa->o_valid |= OBD_MD_LAYOUT_VERSION;
+               }
+       }
  
        sort_brw_pages(pga, page_count);
        rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 0);
        oap->oap_request = ptlrpc_request_addref(req);
        if (interrupted && !req->rq_intr)
                ptlrpc_mark_interrupted(req);
+       if (ndelay) {
+               req->rq_no_resend = req->rq_no_delay = 1;
+               /* probably set a shorter timeout value.
+                * to handle ETIMEDOUT in brw_interpret() correctly. */
+               /* lustre_msg_set_timeout(req, req->rq_timeout / 2); */
+       }
  
        /* Need to update the timestamps after the request is built in case
         * we race with setattr (locally or in queue at OST).  If OST gets
@@@ -68,8 -68,8 +68,8 @@@ if ! combined_mgs_mds; the
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT  43b     53b     54b"
        # bug number for skipped test: LU-9875 LU-9879 LU-9879 LU-9879 LU-9879
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT  70e     80      84      87      100"
 -      # bug number for skipped test: LU-8110 LU-9400 LU-9879 LU-9879 LU-9879
 -      ALWAYS_EXCEPT="$ALWAYS_EXCEPT  102     103     104     105     107"
 +      # bug number for skipped test: LU-8110 LU-9879 LU-9879 LU-9879
 +      ALWAYS_EXCEPT="$ALWAYS_EXCEPT  102     104     105     107"
  fi
  
  # pass "-E lazy_itable_init" to mke2fs to speed up the formatting time
@@@ -2083,7 -2083,7 +2083,7 @@@ t32_test() 
                                error_noexit "Verify DoM creation"
                                return 1
                        }
-                       [ $($LFS getstripe -L $tmp/mnt/lustre/dom) == 100 ] || {
+                       [ $($LFS getstripe -L $tmp/mnt/lustre/dom) == "mdt" ] || {
                                error_noexit "Verify a DoM file"
                                return 1
                        }
@@@ -5921,9 -5921,6 +5921,9 @@@ cleanup_82b() 
        # Remove OSTs from a pool and destroy the pool.
        destroy_pool $ost_pool || true
  
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
        restore_ostindex
  }
  
@@@ -5963,10 -5960,6 +5963,10 @@@ test_82b() { # LU-466
        done
  
        mount_client $MOUNT || error "mount client $MOUNT failed"
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
 +
        wait_osts_up
        $LFS df $MOUNT || error "$LFS df $MOUNT failed"
        mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
@@@ -7316,7 -7309,7 +7316,7 @@@ test_renamefs() 
  
        echo "rename $FSNAME to $newname"
  
 -      if [ ! combined_mgs_mds ]; then
 +      if ! combined_mgs_mds ; then
                local facet=$(mgsdevname)
  
                do_facet mgs \
@@@ -7388,9 -7381,6 +7388,9 @@@ test_103() 
        cp $LUSTRE/tests/test-framework.sh $DIR/$tdir ||
                error "(2) Fail to copy test-framework.sh"
  
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
        do_facet mgs $LCTL pool_new $FSNAME.pool1 ||
                error "(3) Fail to create $FSNAME.pool1"
        # name the pool name as the fsname
        $SETSTRIPE -p $FSNAME $DIR/$tdir/d0 ||
                error "(6) Fail to setstripe on $DIR/$tdir/d0"
  
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
        KEEP_ZPOOL=true
        stopall
  
        FSNAME="mylustre"
        setupall
  
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
        test_103_check_pool $save_fsname 7
  
        if [ $OSTCOUNT -ge 2 ]; then
  
        $SETSTRIPE -p $save_fsname $DIR/$tdir/f0 ||
                error "(16) Fail to setstripe on $DIR/$tdir/f0"
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
  
        stopall
  
        FSNAME="tfs"
        setupall
  
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
        test_103_check_pool $save_fsname 17
  
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
        stopall
  
        test_renamefs $save_fsname
@@@ -286,13 -286,14 +286,13 @@@ copytool_setup() 
        [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME
        local copytool_log=$prefix.copytool${arc_id}_log.$agent.log
  
 +      stack_trap cleanup EXIT
        do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1"
        if [[ $? !=  0 ]]; then
                [[ $HSMTOOL_NOERROR == true ]] ||
                        error "start copytool $facet on $agent failed"
                echo "start copytool $facet on $agent failed"
        fi
 -
 -      trap cleanup EXIT
  }
  
  get_copytool_event_log() {
@@@ -1211,7 -1212,7 +1211,7 @@@ test_11a() 
        echo -n "Verifying released pattern: "
        local PTRN=$($GETSTRIPE -L $f)
        echo $PTRN
-       [[ $PTRN == 80000001 ]] || error "Is not released"
+       [[ $PTRN == released ]] || error "Is not released"
        local fid=$(path2fid $f)
        echo "Verifying new fid $fid in archive"
  
@@@ -2173,6 -2174,7 +2173,6 @@@ test_24c() 
  run_test 24c "check that user,group,other request masks work"
  
  cleanup_test_24d() {
 -      trap 0
        mount -o remount,rw $MOUNT2
        zconf_umount $(facet_host $SINGLEAGT) "$MOUNT3"
  }
@@@ -2199,6 -2201,7 +2199,6 @@@ test_24d() 
        mount -o remount,ro $MOUNT2
  
        do_nodes $(comma_list $(nodes_list)) $LCTL clear
 -      start_full_debug_logging
  
        fid2=$(path2fid $file2)
        [ "$fid1" == "$fid2" ] ||
        $LFS hsm_archive $file1 || error "Fail to archive $file1"
        wait_request_state $fid1 ARCHIVE SUCCEED
  
 -      stop_full_debug_logging
 -
        $LFS hsm_release $file1
        $LFS hsm_restore $file2
        wait_request_state $fid1 RESTORE SUCCEED
        $LFS hsm_release $file2 &&
                error "release should fail on read-only mount"
  
 -      copytool_cleanup
 -      cleanup_test_24d
 +      return 0
  }
  run_test 24d "check that read-only mounts are respected"
  
@@@ -3069,7 -3075,8 +3069,7 @@@ test_34() 
        local there=$(ps -o pid,comm hp $pid >/dev/null)
        [[ -z $there ]] || error "Restore initiator does not exit"
  
 -      local rc=$(wait $pid)
 -      [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc"
 +      wait $pid || error "Restore initiator failed with $?"
  
        copytool_cleanup
  }
@@@ -3105,7 -3112,8 +3105,7 @@@ test_35() 
        local there=$(ps -o pid,comm hp $pid >/dev/null)
        [[ -z $there ]] || error "Restore initiator does not exit"
  
 -      local rc=$(wait $pid)
 -      [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc"
 +      wait $pid || error "Restore initiator failed with $?"
  
        fid2=$(path2fid $f)
        [[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1"
@@@ -3143,7 -3151,9 +3143,7 @@@ test_36() 
        [[ -z $there ]] ||
                error "Restore initiator does not exit"
  
 -      local rc=$(wait $pid)
 -      [[ $rc -eq 0 ]] ||
 -              error "Restore initiator failed with $rc"
 +      wait $pid || error "Restore initiator failed with $?"
  
        copytool_cleanup
  }
diff --combined lustre/tests/sanity.sh
@@@ -2059,10 -2059,6 +2059,10 @@@ test_27D() 
        local ost_list=$(seq $first_ost $ost_step $last_ost)
        local ost_range="$first_ost $last_ost $ost_step"
  
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
 +
        test_mkdir $DIR/$tdir
        pool_add $POOL || error "pool_add failed"
        pool_add_targets $POOL $ost_range || error "pool_add_targets failed"
                error "llapi_layout_test failed"
  
        destroy_test_pools || error "destroy test pools failed"
 +
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
  }
  run_test 27D "validate llapi_layout API"
  
@@@ -5374,68 -5366,6 +5374,68 @@@ test_56aa() { # LU-593
  }
  run_test 56aa "lfs find --size under striped dir"
  
 +test_56ba() {
 +      # Create composite files with one component
 +      TDIR=$DIR/$tdir/1Mfiles
 +      setup_56 5 1 "--component-end 1M"
 +      # Create composite files with three components
 +      TDIR=$DIR/$tdir/2Mfiles
 +      setup_56 5 2 "-E 2M -E 4M -E 6M"
 +      TDIR=$DIR/$tdir
 +      # Create non-composite files
 +      createmany -o $TDIR/${tfile}- 10
 +
 +      local nfiles=$($LFIND --component-end 1M --type f $TDIR | wc -l)
 +      [[ $nfiles == 10 ]] ||
 +              error "lfs find -E 1M found $nfiles != 10 files"
 +
 +      nfiles=$($LFIND ! -E 1M --type f $TDIR | wc -l)
 +      [[ $nfiles == 25 ]] ||
 +              error "lfs find ! -E 1M found $nfiles != 25 files"
 +
 +      # All files have a component that starts at 0
 +      local nfiles=$($LFIND --component-start 0 --type f $TDIR | wc -l)
 +      [[ $nfiles == 35 ]] ||
 +              error "lfs find --component-start 0 found $nfiles != 35 files"
 +
 +      nfiles=$($LFIND --component-start 2M --type f $TDIR | wc -l)
 +      [[ $nfiles == 15 ]] ||
 +              error "$LFIND --component-start 2M found $nfiles != 15 files"
 +
 +      # All files created here have a componenet that does not starts at 2M
 +      nfiles=$($LFIND ! --component-start 2M --type f $TDIR | wc -l)
 +      [[ $nfiles == 35 ]] ||
 +              error "$LFIND ! --component-start 2M found $nfiles != 35 files"
 +
 +      # Find files with a specified number of components
 +      local nfiles=$($LFIND --component-count 3 --type f $TDIR | wc -l)
 +      [[ $nfiles == 15 ]] ||
 +              error "lfs find --component-count 3 found $nfiles != 15 files"
 +
 +      # Remember non-composite files have a component count of zero
 +      local nfiles=$($LFIND --component-count 0 --type f $TDIR | wc -l)
 +      [[ $nfiles == 10 ]] ||
 +              error "lfs find --component-count 0 found $nfiles != 10 files"
 +
 +      nfiles=$($LFIND ! --component-count 3 --type f $TDIR | wc -l)
 +      [[ $nfiles == 20 ]] ||
 +              error "$LFIND ! --component-count 3 found $nfiles != 20 files"
 +
 +      # All files have a flag called "init"
 +      local nfiles=$($LFIND --component-flags init --type f $TDIR | wc -l)
 +      [[ $nfiles == 35 ]] ||
 +              error "$LFIND --component-flags init found $nfiles != 35 files"
 +
 +      # Multi-component files will have a component not initialized
 +      local nfiles=$($LFIND ! --component-flags init --type f $TDIR | wc -l)
 +      [[ $nfiles == 15 ]] ||
 +              error "$LFIND !--component-flags init found $nfiles != 15 files"
 +
 +      rm -rf $TDIR
 +
 +}
 +run_test 56ba "test lfs find --component-end, -start, -count, and -flags"
 +
  test_57a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
        # note test will not do anything if MDS is not local
@@@ -10063,7 -9993,7 +10063,7 @@@ test_133g() 
                -type f \
                -not -name force_lbug \
                -not -name changelog_mask \
-               -exec badarea_io '{}' \; &> /dev/null ||
+               -exec badarea_io '{}' \; ||
                error "find $proc_dirs failed"
  
        local facet
                        -type f \
                        -not -name force_lbug \
                        -not -name changelog_mask \
-                       -exec badarea_io '{}' \\\; &> /dev/null ||
+                       -exec badarea_io '{}' \\\; ||
                                error "$facet find $facet_proc_dirs failed"
        done
  
@@@ -12301,12 -12231,8 +12301,12 @@@ test_200() 
        local test_path=$POOL_ROOT/$POOL_DIR_NAME
        local file_dir=$POOL_ROOT/file_tst
        local subdir=$test_path/subdir
 -
        local rc=0
 +
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
 +
        while : ; do
                # former test_200a test_200b
                pool_add $POOL                          || { rc=$? ; break; }
                pool_create_files $POOL $file_dir $files "$ost_list" \
                                                        || { rc=$? ; break; }
                # former test_200g test_200h
 -              pool_lfs_df $POOL                       || { rc=$? ; break; }
 +              pool_lfs_df $POOL                       || { rc=$? ; break; }
                pool_file_rel_path $POOL $test_path     || { rc=$? ; break; }
  
                # former test_201a test_201b test_201c
  
                local f=$test_path/$tfile
                pool_remove_all_targets $POOL $f        || { rc=$? ; break; }
 -              pool_remove $POOL $f                    || { rc=$? ; break; }
 +              pool_remove $POOL $f                    || { rc=$? ; break; }
                break
        done
  
        destroy_test_pools
 +
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
        return $rc
  }
  run_test 200 "OST pools"
@@@ -13020,10 -12942,6 +13020,10 @@@ test_220() { #LU-32
  
        $LFS df -i
  
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
 +
        do_facet ost$((OSTIDX + 1)) lctl set_param fail_val=-1
        #define OBD_FAIL_OST_ENOINO              0x229
        do_facet ost$((OSTIDX + 1)) lctl set_param fail_loc=0x229
        do_facet ost$((OSTIDX + 1)) lctl set_param fail_val=0
        do_facet ost$((OSTIDX + 1)) lctl set_param fail_loc=0
  
 -      do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $OST || return 4
 -      do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME || return 5
 +      do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $OST ||
 +              error "$LCTL pool_remove $FSNAME.$TESTNAME $OST failed"
 +      do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME ||
 +              error "$LCTL pool_destroy $FSNAME.$TESTNAME failed"
        echo "unlink $MDSOBJS files @$next_id..."
 -      unlinkmany $DIR/$tdir/f $MDSOBJS || return 6
 +      unlinkmany $DIR/$tdir/f $MDSOBJS || error "unlinkmany failed"
 +
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
  }
  run_test 220 "preallocated MDS objects still used if ENOSPC from OST"
  
@@@ -13235,6 -13147,8 +13235,6 @@@ test_225b () 
              skip_env "Need to mount OST to test" && return
        fi
  
 -      [ $MDSCOUNT -ge 2 ] &&
 -              skip "skipping now for more than one MDT" && return
         local mds=$(facet_host $SINGLEMDS)
         local target=$(do_nodes $mds 'lctl dl' | \
                        awk "{if (\$2 == \"UP\" && \$3 == \"mdt\") {print \$4}}")
@@@ -13486,7 -13400,7 +13486,7 @@@ test_229() { # LU-2482, LU-344
        $GETSTRIPE -v $DIR/$tfile
  
        local pattern=$($GETSTRIPE -L $DIR/$tfile)
-       [ X"$pattern" = X"80000001" ] || error "pattern error ($pattern)"
+       [ X"$pattern" = X"released" ] || error "pattern error ($pattern)"
  
        local stripe_count=$($GETSTRIPE -c $DIR/$tfile) || error "getstripe"
        [ $stripe_count -eq 2 ] || error "stripe count not 2 ($stripe_count)"
@@@ -14585,9 -14499,6 +14585,9 @@@ test_253() 
                        osp.$mdtosc_proc1.reserved_mb_low)
        echo "prev high watermark $last_wm_h, prev low watermark $last_wm_l"
  
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
        create_pool $FSNAME.$TESTNAME || error "Pool creation failed"
        do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $ost_name ||
                error "Adding $ost_name to pool failed"
                error "Remove $ost_name from pool failed"
        do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME ||
                error "Pool destroy fialed"
 +
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
  }
  run_test 253 "Check object allocation limit"
  
@@@ -14967,10 -14874,6 +14967,10 @@@ test_255c() 
        local difference
        local i
        local rc
 +
 +      [ $(lustre_version_code ost1) -lt $(version_code 2.10.50) ] &&
 +              skip "lustre < 2.10.53 does not support lockahead" && return
 +
        test_mkdir -p $DIR/$tdir
        $SETSTRIPE -i 0 $DIR/$tdir
  
                       ldlm.namespaces.$FSNAME-OST0000*osc-f*.lock_unused_count)
                difference="$((new_count - count))"
  
 -              # Test 15 output is divided by 1000 to map down to valid return
 +              # Test 15 output is divided by 100 to map down to valid return
                if [ $i -eq 15 ]; then
 -                      rc="$((rc * 1000))"
 +                      rc="$((rc * 100))"
                fi
  
                if [ $difference -ne $rc ]; then
@@@ -15198,7 -15101,7 +15198,7 @@@ test_270a() 
        $LFS setstripe -E 1M -L mdt $dom ||
                error "Can't create DoM layout"
  
-       [ $($LFS getstripe -L $dom) == 100 ] || error "bad pattern"
+       [ $($LFS getstripe -L $dom) == "mdt" ] || error "bad pattern"
        [ $($LFS getstripe -c $dom) == 0 ] || error "bad stripe count"
        [ $($LFS getstripe -S $dom) == 1048576 ] || error "bad stripe size"
  
@@@ -15304,7 -15207,7 +15304,7 @@@ test_270c() 
  
        # check files inherit DoM EA
        touch $DIR/$tdir/first
-       [ $($GETSTRIPE -L $DIR/$tdir/first) == 100 ] ||
+       [ $($GETSTRIPE -L $DIR/$tdir/first) == "mdt" ] ||
                error "bad pattern"
        [ $($LFS getstripe -c $DIR/$tdir/first) == 0 ] ||
                error "bad stripe count"
        # check directory inherits DoM EA and uses it as default
        mkdir $DIR/$tdir/subdir
        touch $DIR/$tdir/subdir/second
-       [ $($LFS getstripe -L $DIR/$tdir/subdir/second) == 100 ] ||
+       [ $($LFS getstripe -L $DIR/$tdir/subdir/second) == "mdt" ] ||
                error "bad pattern in sub-directory"
        [ $($LFS getstripe -c $DIR/$tdir/subdir/second) == 0 ] ||
                error "bad stripe count in sub-directory"
@@@ -15337,7 -15240,7 +15337,7 @@@ test_270d() 
        touch $DIR/$tdir/subdir/f2
        [ $($LFS getstripe -c $DIR/$tdir/subdir/f2) == 1 ] ||
                error "wrong default striping in file 2"
-       [ $($LFS getstripe -L $DIR/$tdir/subdir/f2) == 1 ] ||
+       [ $($LFS getstripe -L $DIR/$tdir/subdir/f2) == "raid0" ] ||
                error "bad pattern in file 2"
        return 0
  }
@@@ -16799,11 -16702,8 +16799,11 @@@ test_406() 
        local def_stripe_size=$($GETSTRIPE -S $MOUNT)
        local def_stripe_offset=$($GETSTRIPE -i $MOUNT)
        local def_pool=$($GETSTRIPE -p $MOUNT)
 -
        local test_pool=$TESTNAME
 +
 +      if ! combined_mgs_mds ; then
 +              mount_mgs_client
 +      fi
        pool_add $test_pool || error "pool_add failed"
        pool_add_targets $test_pool 0 $(($OSTCOUNT - 1)) 1 ||
                error "pool_add_targets failed"
        local f=$DIR/$tdir/$tfile
        pool_remove_all_targets $test_pool $f
        pool_remove $test_pool $f
 +
 +      if ! combined_mgs_mds ; then
 +              umount_mgs_client
 +      fi
  }
  run_test 406 "DNE support fs default striping"
  
@@@ -250,6 -250,7 +250,7 @@@ init_test_env() 
        export SGPDDSURVEY=${SGPDDSURVEY:-"$LUSTRE/../lustre-iokit/sgpdd-survey/sgpdd-survey")}
        [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey)
        export MCREATE=${MCREATE:-mcreate}
+       export MULTIOP=${MULTIOP:-multiop}
        # Ubuntu, at least, has a truncate command in /usr/bin
        # so fully path our truncate command.
        export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate}
@@@ -1864,19 -1865,6 +1865,19 @@@ zconf_umount() 
      fi
  }
  
 +# Mount the file system on the MGS
 +mount_mgs_client() {
 +      do_facet mgs "mkdir -p $MOUNT"
 +      zconf_mount $mgs_HOST $MOUNT $MOUNT_OPTS ||
 +              error "unable to mount $MOUNT on MGS"
 +}
 +
 +# Unmount the file system on the MGS
 +umount_mgs_client() {
 +      zconf_umount $mgs_HOST $MOUNT
 +      do_facet mgs "rm -rf $MOUNT"
 +}
 +
  # nodes is comma list
  sanity_mount_check_nodes () {
      local nodes=$1
@@@ -4870,19 -4858,17 +4871,19 @@@ wait_for_function () 
  }
  
  check_network() {
 -    local host=$1
 -    local max=$2
 -    local sleep=${3:-5}
 +      local host=$1
 +      local max=$2
 +      local sleep=${3:-5}
  
 -    echo `date +"%H:%M:%S (%s)"` waiting for $host network $max secs ...
 -    if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then
 -        echo "Network not available!"
 -        exit 1
 -    fi
 +      [ "$host" = "$HOSTNAME" ] && return 0
 +
 +      echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..."
 +      if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then
 +              echo "Network not available!"
 +              exit 1
 +      fi
  
 -    echo `date +"%H:%M:%S (%s)"` network interface is UP
 +      echo "$(date +'%H:%M:%S (%s)') network interface is UP"
  }
  
  no_dsh() {
@@@ -5288,29 -5274,6 +5289,29 @@@ report_error() 
  # Test interface
  ##################################
  
 +# usage: stack_trap arg sigspec
 +#
 +# stack_trap() behaves like bash's built-in trap, except that it "stacks" the
 +# command ``arg`` on top of previously defined commands for ``sigspec`` instead
 +# of overwriting them.
 +# stacked traps are executed in reverse order of their registration
 +#
 +# arg and sigspec have the same meaning as in man (1) trap
 +stack_trap()
 +{
 +      local arg="$1"
 +      local sigspec="$2"
 +
 +      local cmd="$(trap -p $sigspec)"
 +
 +      cmd="${cmd#trap -- \'}"
 +      cmd="${cmd%\'*}"
 +      [ -n "$cmd" ] && cmd="; $cmd"
 +      cmd="${arg}$cmd"
 +
 +      trap "$cmd" $sigspec
 +}
 +
  error_noexit() {
        report_error "$@"
  }
@@@ -6357,7 -6320,7 +6358,7 @@@ convert_facet2label() 
  }
  
  get_clientosc_proc_path() {
-       echo "${1}-osc-*"
+       echo "${1}-osc-ffff*"
  }
  
  # If the 2.0 MDS was mounted on 1.8 device, then the OSC and LOV names
@@@ -6382,7 -6345,10 +6383,7 @@@ get_mdtosc_proc_path() 
        local mdt_label=$(convert_facet2label $mds_facet)
        local mdt_index=$(echo $mdt_label | sed -e 's/^.*-//')
  
 -      if [ $(lustre_version_code $mds_facet) -le $(version_code 1.8.0) ] ||
 -         mds_on_old_device $mds_facet; then
 -              echo "${ost_label}-osc"
 -      elif [[ $ost_label = *OST* ]]; then
 +      if [[ $ost_label = *OST* ]]; then
                echo "${ost_label}-osc-${mdt_index}"
        else
                echo "${ost_label}-osp-${mdt_index}"
diff --combined lustre/utils/Makefile.am
@@@ -51,10 -51,7 +51,10 @@@ endi
  noinst_LIBRARIES = liblustreapitmp.a
  endif # UTILS
  
 -lctl_SOURCES = lustre_lfsck.c portals.c debug.c obd.c lustre_cfg.c lctl.c obdctl.h lsnapshot.c
 +lctl_SOURCES = portals.c debug.c obd.c lustre_cfg.c lctl.c obdctl.h
 +if SERVER
 +lctl_SOURCES += lustre_lfsck.c lsnapshot.c
 +endif
  lctl_LDADD :=  liblustreapi.a $(LIBCFS) $(LIBREADLINE) $(PTHREAD_LIBS)
  lctl_DEPENDENCIES := $(LIBCFS) liblustreapi.a
  
@@@ -92,6 -89,7 +92,7 @@@ liblustreapitmp_a_SOURCES = liblustreap
                            liblustreapi_json.c liblustreapi_layout.c \
                            liblustreapi_lease.c liblustreapi_util.c \
                            liblustreapi_kernelconn.c liblustreapi_param.c \
+                           liblustreapi_mirror.c \
                            $(top_builddir)/libcfs/libcfs/util/string.c \
                            $(top_builddir)/libcfs/libcfs/util/param.c \
                            liblustreapi_ladvise.c liblustreapi_chlg.c
diff --combined lustre/utils/lfs.c
@@@ -73,7 -73,6 +73,6 @@@
  #endif /* !ARRAY_SIZE */
  
  /* all functions */
- static int lfs_setstripe(int argc, char **argv);
  static int lfs_find(int argc, char **argv);
  static int lfs_getstripe(int argc, char **argv);
  static int lfs_getdirstripe(int argc, char **argv);
@@@ -109,7 -108,35 +108,35 @@@ static int lfs_hsm_cancel(int argc, cha
  static int lfs_swap_layouts(int argc, char **argv);
  static int lfs_mv(int argc, char **argv);
  static int lfs_ladvise(int argc, char **argv);
+ static int lfs_mirror(int argc, char **argv);
+ static int lfs_mirror_list_commands(int argc, char **argv);
  static int lfs_list_commands(int argc, char **argv);
+ static inline int lfs_mirror_resync(int argc, char **argv);
+ enum setstripe_origin {
+       SO_SETSTRIPE,
+       SO_MIGRATE,
+       SO_MIRROR_CREATE,
+       SO_MIRROR_EXTEND
+ };
+ static int lfs_setstripe0(int argc, char **argv, enum setstripe_origin opc);
+ static inline int lfs_setstripe(int argc, char **argv)
+ {
+       return lfs_setstripe0(argc, argv, SO_SETSTRIPE);
+ }
+ static inline int lfs_setstripe_migrate(int argc, char **argv)
+ {
+       return lfs_setstripe0(argc, argv, SO_MIGRATE);
+ }
+ static inline int lfs_mirror_create(int argc, char **argv)
+ {
+       return lfs_setstripe0(argc, argv, SO_MIRROR_CREATE);
+ }
+ static inline int lfs_mirror_extend(int argc, char **argv)
+ {
+       return lfs_setstripe0(argc, argv, SO_MIRROR_EXTEND);
+ }
  
  /* Setstripe and migrate share mostly the same parameters */
  #define SSM_CMD_COMMON(cmd) \
        "\t              respectively, -1 for EOF). Must be a multiple of\n"\
        "\t              stripe_size.\n"
  
+ #define MIRROR_CREATE_HELP                                                   \
+       "\tmirror_count: Number of mirrors to be created with the upcoming\n"  \
+       "\t              setstripe layout options\n"                           \
+       "\t              It defaults to 1 if not specified; if specified,\n"   \
+       "\t              it must follow the option without a space.\n"         \
+       "\t              The option can also be repeated multiple times to\n"  \
+       "\t              separate mirrors that have different layouts.\n"      \
+       "\tsetstripe options: Mirror layout\n"                                 \
+       "\t              It can be a plain layout or a composite layout.\n"    \
+       "\t              If not specified, the stripe options inherited\n"     \
+       "\t              from the previous component will be used.\n"          \
+       "\tparent:       Use default stripe options from parent directory\n"
+ #define MIRROR_EXTEND_HELP                                                   \
+       MIRROR_CREATE_HELP                                                     \
+       "\tvictim_file:  The layout of victim_file will be split and used\n"   \
+       "\t              as a mirror added to the mirrored file.\n"            \
+       "\tno-verify:    This option indicates not to verify the mirror(s)\n"  \
+       "\t              from victim file(s) in case the victim file(s)\n"     \
+       "\t              contains the same data as the original mirrored\n"    \
+       "\t              file.\n"
+ #define MIRROR_EXTEND_USAGE                                                  \
+       "                 <--mirror-count|-N[mirror_count]>\n"                 \
+       "                 [setstripe options|--parent|-f <victim_file>]\n"     \
+       "                 [--no-verify]\n"
+ #define SETSTRIPE_USAGE                                                       \
+       SSM_CMD_COMMON("setstripe")                                     \
+       MIRROR_EXTEND_USAGE                                             \
+       "                 <directory|filename>\n"                       \
+       SSM_HELP_COMMON                                                 \
+       MIRROR_EXTEND_HELP
  
  #define MIGRATE_USAGE                                                 \
        SSM_CMD_COMMON("migrate  ")                                     \
        "\tmode: the mode of the directory\n"
  
  static const char     *progname;
- static bool            file_lease_supported = true;
+ /**
+  * command_t mirror_cmdlist - lfs mirror commands.
+  */
+ command_t mirror_cmdlist[] = {
+       { .pc_name = "create", .pc_func = lfs_mirror_create,
+         .pc_help = "Create a mirrored file.\n"
+               "usage: lfs mirror create "
+               "<--mirror-count|-N[mirror_count]> "
+               "[setstripe options|--parent] ... <filename|directory>\n"
+         MIRROR_CREATE_HELP },
+       { .pc_name = "extend", .pc_func = lfs_mirror_extend,
+         .pc_help = "Extend a mirrored file.\n"
+               "usage: lfs mirror extend "
+               "<--mirror-count|-N[mirror_count]> [--no-verify] "
+               "[setstripe options|--parent|-f <victim_file>] ... <filename>\n"
+         MIRROR_EXTEND_HELP },
+       { .pc_name = "resync", .pc_func = lfs_mirror_resync,
+         .pc_help = "Resynchronizes out-of-sync mirrored file(s).\n"
+               "usage: lfs mirror resync [--only <mirror_id[,...]>] "
+               "<mirrored file> [<mirrored file2>...]\n"},
+       { .pc_name = "--list-commands", .pc_func = lfs_mirror_list_commands,
+         .pc_help = "list commands supported by lfs mirror"},
+       { .pc_name = "help", .pc_func = Parser_help, .pc_help = "help" },
+       { .pc_name = "exit", .pc_func = Parser_quit, .pc_help = "quit" },
+       { .pc_name = "quit", .pc_func = Parser_quit, .pc_help = "quit" },
+       { .pc_help = NULL }
+ };
  
  /* all available commands */
  command_t cmdlist[] = {
         "usage: hsm_release [--filelist FILELIST] [--data DATA] <file> ..."},
        {"hsm_remove", lfs_hsm_remove, 0,
         "Remove file copy from external storage.\n"
 -       "usage: hsm_remove [--filelist FILELIST] [--data DATA]\n"
 -       "                  [--mntpath MOUNTPATH] [--archive NUM] <file|FID> ...\n"
 +       "usage: hsm_remove [--filelist FILELIST] [--data DATA] "
 +       "[--archive NUM]\n"
 +       "                  (FILE [FILE ...] | "
 +       "--mntpath MOUNTPATH FID [FID ...])\n"
         "\n"
 -       "Note: To remove files from the archive that have been deleted on\n"
 -       "Lustre, set mntpath and optionally archive. In that case, all the\n"
 -       "positional arguments and entries in the file list must be FIDs."
 +       "Note: To remove an archived copy of a file already deleted from a "
 +       "Lustre FS, the\n"
 +       "--mntpath option and a list of FIDs must be specified"
        },
        {"hsm_cancel", lfs_hsm_cancel, 0,
         "Cancel requests related to specified files.\n"
         "usage: hsm_cancel [--filelist FILELIST] [--data DATA] <file> ..."},
        {"swap_layouts", lfs_swap_layouts, 0, "Swap layouts between 2 files.\n"
         "usage: swap_layouts <path1> <path2>"},
-       {"migrate", lfs_setstripe, 0,
+       {"migrate", lfs_setstripe_migrate, 0,
         "migrate a directory between MDTs.\n"
         "usage: migrate --mdt-index <mdt_idx> [--verbose|-v] "
         "<directory>\n"
         "               {[--end|-e END[kMGT]] | [--length|-l LENGTH[kMGT]]}\n"
         "               {[--mode|-m [READ,WRITE]}\n"
         "               <file> ...\n"},
+       {"mirror", lfs_mirror, mirror_cmdlist,
+        "lfs commands used to manage files with mirrored components:\n"
+        "lfs mirror create - create a mirrored file or directory\n"
+        "lfs mirror extend - add mirror(s) to an existing file\n"
+        "lfs mirror split  - split a mirror from an existing mirrored file\n"
+        "lfs mirror resync - resynchronize an out-of-sync mirrored file\n"
+        "lfs mirror verify - verify a mirrored file\n"},
        {"help", Parser_help, 0, "help"},
        {"exit", Parser_quit, 0, "quit"},
        {"quit", Parser_quit, 0, "quit"},
  };
  
  
- #define MIGRATION_NONBLOCK    1
  static int check_hashtype(const char *hashtype)
  {
        int i;
        return 0;
  }
  
- /**
-  * Internal helper for migrate_copy_data(). Check lease and report error if
-  * need be.
-  *
-  * \param[in]  fd           File descriptor on which to check the lease.
-  * \param[out] lease_broken Set to true if the lease was broken.
-  * \param[in]  group_locked Whether a group lock was taken or not.
-  * \param[in]  path         Name of the file being processed, for error
-  *                        reporting
-  *
-  * \retval 0       Migration can keep on going.
-  * \retval -errno  Error occurred, abort migration.
-  */
- static int check_lease(int fd, bool *lease_broken, bool group_locked,
-                      const char *path)
+ static const char *error_loc = "syserror";
+ enum {
+       MIGRATION_NONBLOCK      = 1 << 0,
+       MIGRATION_MIRROR        = 1 << 1,
+ };
+ static int lfs_component_create(char *fname, int open_flags, mode_t open_mode,
+                               struct llapi_layout *layout);
+ static int
+ migrate_open_files(const char *name, const struct llapi_stripe_param *param,
+                  struct llapi_layout *layout, int *fd_src, int *fd_tgt)
  {
-       int rc;
+       int                      fd = -1;
+       int                      fdv = -1;
+       int                      mdt_index;
+       int                      random_value;
+       char                     parent[PATH_MAX];
+       char                     volatile_file[PATH_MAX];
+       char                    *ptr;
+       int                      rc;
+       struct stat              st;
+       struct stat              stv;
  
-       if (!file_lease_supported)
-               return 0;
+       if (param == NULL && layout == NULL) {
+               error_loc = "layout information";
+               return -EINVAL;
+       }
  
-       rc = llapi_lease_check(fd);
-       if (rc > 0)
-               return 0; /* llapi_check_lease returns > 0 on success. */
+       /* search for file directory pathname */
+       if (strlen(name) > sizeof(parent) - 1) {
+               error_loc = "source file name";
+               return -ERANGE;
+       }
  
-       if (!group_locked) {
-               fprintf(stderr, "%s: cannot migrate '%s': file busy\n",
-                       progname, path);
-               rc = rc ? rc : -EAGAIN;
+       strncpy(parent, name, sizeof(parent));
+       ptr = strrchr(parent, '/');
+       if (ptr == NULL) {
+               if (getcwd(parent, sizeof(parent)) == NULL) {
+                       error_loc = "getcwd";
+                       return -errno;
+               }
        } else {
-               fprintf(stderr, "%s: external attempt to access file '%s' "
-                       "blocked until migration ends.\n", progname, path);
-               rc = 0;
+               if (ptr == parent) /* leading '/' */
+                       ptr = parent + 1;
+               *ptr = '\0';
+       }
+       /* open file, direct io */
+       /* even if the file is only read, WR mode is nedeed to allow
+        * layout swap on fd */
+       fd = open(name, O_RDWR | O_DIRECT);
+       if (fd < 0) {
+               rc = -errno;
+               error_loc = "cannot open source file";
+               return rc;
+       }
+       rc = llapi_file_fget_mdtidx(fd, &mdt_index);
+       if (rc < 0) {
+               error_loc = "cannot get MDT index";
+               goto out;
+       }
+       do {
+               int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW;
+               mode_t open_mode = S_IRUSR | S_IWUSR;
+               random_value = random();
+               rc = snprintf(volatile_file, sizeof(volatile_file),
+                             "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR,
+                             mdt_index, random_value);
+               if (rc >= sizeof(volatile_file)) {
+                       rc = -ENAMETOOLONG;
+                       break;
+               }
+               /* create, open a volatile file, use caching (ie no directio) */
+               if (param != NULL)
+                       fdv = llapi_file_open_param(volatile_file, open_flags,
+                                                   open_mode, param);
+               else
+                       fdv = lfs_component_create(volatile_file, open_flags,
+                                                  open_mode, layout);
+       } while (fdv < 0 && (rc = fdv) == -EEXIST);
+       if (rc < 0) {
+               error_loc = "cannot create volatile file";
+               goto out;
+       }
+       /* In case the MDT does not support creation of volatile files
+        * we should try to unlink it. */
+       (void)unlink(volatile_file);
+       /* Not-owner (root?) special case.
+        * Need to set owner/group of volatile file like original.
+        * This will allow to pass related check during layout_swap.
+        */
+       rc = fstat(fd, &st);
+       if (rc != 0) {
+               rc = -errno;
+               error_loc = "cannot stat source file";
+               goto out;
+       }
+       rc = fstat(fdv, &stv);
+       if (rc != 0) {
+               rc = -errno;
+               error_loc = "cannot stat volatile";
+               goto out;
+       }
+       if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) {
+               rc = fchown(fdv, st.st_uid, st.st_gid);
+               if (rc != 0) {
+                       rc = -errno;
+                       error_loc = "cannot change ownwership of volatile";
+                       goto out;
+               }
+       }
+ out:
+       if (rc < 0) {
+               if (fd > 0)
+                       close(fd);
+               if (fdv > 0)
+                       close(fdv);
+       } else {
+               *fd_src = fd;
+               *fd_tgt = fdv;
+               error_loc = NULL;
        }
        return rc;
  }
  
- static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size,
-                            bool group_locked, const char *fname)
+ static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int))
  {
+       struct llapi_layout *layout;
+       size_t   buf_size = 4 * 1024 * 1024;
        void    *buf = NULL;
        ssize_t  rsize = -1;
        ssize_t  wsize = 0;
        size_t   wpos = 0;
        off_t    bufoff = 0;
        int      rc;
-       bool     lease_broken = false;
+       layout = llapi_layout_get_by_fd(fd_src, 0);
+       if (layout != NULL) {
+               uint64_t stripe_size;
+               rc = llapi_layout_stripe_size_get(layout, &stripe_size);
+               if (rc == 0)
+                       buf_size = stripe_size;
+               llapi_layout_free(layout);
+       }
  
        /* Use a page-aligned buffer for direct I/O */
        rc = posix_memalign(&buf, getpagesize(), buf_size);
                /* read new data only if we have written all
                 * previously read data */
                if (wpos == rpos) {
-                       if (!lease_broken) {
-                               rc = check_lease(fd_src, &lease_broken,
-                                                group_locked, fname);
+                       if (check_file) {
+                               rc = check_file(fd_src);
                                if (rc < 0)
-                                       goto out;
+                                       break;
                        }
                        rsize = read(fd_src, buf, buf_size);
                        if (rsize < 0) {
                                rc = -errno;
-                               fprintf(stderr, "%s: %s: read failed: %s\n",
-                                       progname, fname, strerror(-rc));
-                               goto out;
+                               break;
                        }
                        rpos += rsize;
                        bufoff = 0;
                wsize = write(fd_dst, buf + bufoff, rpos - wpos);
                if (wsize < 0) {
                        rc = -errno;
-                       fprintf(stderr,
-                               "%s: %s: write failed on volatile: %s\n",
-                               progname, fname, strerror(-rc));
-                       goto out;
+                       break;
                }
                wpos += wsize;
                bufoff += wsize;
        }
  
-       rc = fsync(fd_dst);
-       if (rc < 0) {
-               rc = -errno;
-               fprintf(stderr, "%s: %s: fsync failed: %s\n",
-                       progname, fname, strerror(-rc));
+       if (rc == 0) {
+               rc = fsync(fd_dst);
+               if (rc < 0)
+                       rc = -errno;
        }
  
- out:
        free(buf);
        return rc;
  }
  
- static int migrate_copy_timestamps(int fdv, const struct stat *st)
+ static int migrate_copy_timestamps(int fd, int fdv)
  {
-       struct timeval  tv[2] = {
-               {.tv_sec = st->st_atime},
-               {.tv_sec = st->st_mtime}
-       };
+       struct stat st;
  
-       return futimes(fdv, tv);
+       if (fstat(fd, &st) == 0) {
+               struct timeval tv[2] = {
+                       {.tv_sec = st.st_atime},
+                       {.tv_sec = st.st_mtime}
+               };
+               return futimes(fdv, tv);
+       }
+       return -errno;
  }
  
- static int migrate_block(int fd, int fdv, const struct stat *st,
-                        size_t buf_size, const char *name)
+ static int migrate_block(int fd, int fdv)
  {
        __u64   dv1;
        int     gid;
  
        rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot get dataversion: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "cannot get dataversion";
                return rc;
        }
  
         * block it too. */
        rc = llapi_group_lock(fd, gid);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot get group lock: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "cannot get group lock";
                return rc;
        }
  
-       rc = migrate_copy_data(fd, fdv, buf_size, true, name);
+       rc = migrate_copy_data(fd, fdv, NULL);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: data copy failed\n", progname, name);
+               error_loc = "data copy failed";
                goto out_unlock;
        }
  
        /* Make sure we keep original atime/mtime values */
-       rc = migrate_copy_timestamps(fdv, st);
+       rc = migrate_copy_timestamps(fd, fdv);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: timestamp copy failed\n",
-                       progname, name);
+               error_loc = "timestamp copy failed";
                goto out_unlock;
        }
  
        rc = llapi_fswap_layouts_grouplock(fd, fdv, dv1, 0, 0,
                                           SWAP_LAYOUTS_CHECK_DV1);
        if (rc == -EAGAIN) {
-               fprintf(stderr, "%s: %s: dataversion changed during copy, "
-                       "migration aborted\n", progname, name);
+               error_loc = "file changed";
                goto out_unlock;
        } else if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot swap layouts: %s\n", progname,
-                       name, strerror(-rc));
+               error_loc = "cannot swap layout";
                goto out_unlock;
        }
  
  out_unlock:
        rc2 = llapi_group_unlock(fd, gid);
        if (rc2 < 0 && rc == 0) {
-               fprintf(stderr, "%s: %s: putting group lock failed: %s\n",
-                       progname, name, strerror(-rc2));
+               error_loc = "unlock group lock";
                rc = rc2;
        }
  
        return rc;
  }
  
- static int migrate_nonblock(int fd, int fdv, const struct stat *st,
-                           size_t buf_size, const char *name)
+ /**
+  * Internal helper for migrate_copy_data(). Check lease and report error if
+  * need be.
+  *
+  * \param[in]  fd           File descriptor on which to check the lease.
+  *
+  * \retval 0       Migration can keep on going.
+  * \retval -errno  Error occurred, abort migration.
+  */
+ static int check_lease(int fd)
+ {
+       int rc;
+       rc = llapi_lease_check(fd);
+       if (rc > 0)
+               return 0; /* llapi_check_lease returns > 0 on success. */
+       return -EBUSY;
+ }
+ static int migrate_nonblock(int fd, int fdv)
  {
        __u64   dv1;
        __u64   dv2;
  
        rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot get data version: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "cannot get data version";
                return rc;
        }
  
-       rc = migrate_copy_data(fd, fdv, buf_size, false, name);
+       rc = migrate_copy_data(fd, fdv, check_lease);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: data copy failed\n", progname, name);
+               error_loc = "data copy failed";
                return rc;
        }
  
        rc = llapi_get_data_version(fd, &dv2, LL_DV_RD_FLUSH);
        if (rc != 0) {
-               fprintf(stderr, "%s: %s: cannot get data version: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "cannot get data version";
                return rc;
        }
  
        if (dv1 != dv2) {
                rc = -EAGAIN;
-               fprintf(stderr, "%s: %s: data version changed during "
-                               "migration\n",
-                       progname, name);
+               error_loc = "source file changed";
                return rc;
        }
  
        /* Make sure we keep original atime/mtime values */
-       rc = migrate_copy_timestamps(fdv, st);
-       if (rc < 0) {
-               fprintf(stderr, "%s: %s: timestamp copy failed\n",
-                       progname, name);
-               return rc;
-       }
-       /* Atomically put lease, swap layouts and close.
-        * for a migration we need to check data version on file did
-        * not change. */
-       rc = llapi_fswap_layouts(fd, fdv, 0, 0, SWAP_LAYOUTS_CLOSE);
+       rc = migrate_copy_timestamps(fd, fdv);
        if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot swap layouts: %s\n",
-                       progname, name, strerror(-rc));
+               error_loc = "timestamp copy failed";
                return rc;
        }
  
@@@ -681,22 -878,18 +880,22 @@@ static int lfs_component_del(char *fnam
        /* LCME_FL_INIT is the only supported flag in PFL */
        if (flags != 0) {
                if (flags & ~LCME_KNOWN_FLAGS) {
 -                      fprintf(stderr, "Invalid component flags %#x\n", flags);
 +                      fprintf(stderr,
 +                              "%s setstripe: bad component flags %#x\n",
 +                              progname, flags);
                        return -EINVAL;
                }
        } else if (comp_id > LCME_ID_MAX) {
 -              fprintf(stderr, "Invalid component id %u\n", comp_id);
 +              fprintf(stderr, "%s setstripe: bad component id %u\n",
 +                      progname, comp_id);
                return -EINVAL;
        }
  
        rc = llapi_layout_file_comp_del(fname, comp_id, flags);
        if (rc)
 -              fprintf(stderr, "Delete component %#x from %s failed. %s\n",
 -                      comp_id, fname, strerror(errno));
 +              fprintf(stderr,
 +                      "%s setstripe: cannot delete component %#x from '%s': %s\n",
 +                      progname, comp_id, fname, strerror(errno));
        return rc;
  }
  
@@@ -741,189 -934,436 +940,436 @@@ static int lfs_migrate(char *name, __u6
                       struct llapi_stripe_param *param,
                       struct llapi_layout *layout)
  {
-       int                      fd = -1;
-       int                      fdv = -1;
-       char                     parent[PATH_MAX];
-       int                      mdt_index;
-       int                      random_value;
-       char                     volatile_file[sizeof(parent) +
-                                              LUSTRE_VOLATILE_HDR_LEN +
-                                              2 * sizeof(mdt_index) +
-                                              2 * sizeof(random_value) + 4];
-       char                    *ptr;
-       int                      rc;
-       struct lov_user_md      *lum = NULL;
-       int                      lum_size;
-       int                      buf_size = 1024 * 1024 * 4;
-       bool                     have_lease_rdlck = false;
-       struct stat              st;
-       struct stat              stv;
+       int fd = -1;
+       int fdv = -1;
+       int rc;
+       rc = migrate_open_files(name, param, layout, &fd, &fdv);
+       if (rc < 0)
+               goto out;
  
-       /* find the right size for the IO and allocate the buffer */
-       lum_size = lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3);
-       lum = malloc(lum_size);
-       if (lum == NULL) {
-               rc = -ENOMEM;
-               goto free;
+       if (!(migration_flags & MIGRATION_NONBLOCK)) {
+               /* Blocking mode (forced if servers do not support file lease).
+                * It is also the default mode, since we cannot distinguish
+                * between a broken lease and a server that does not support
+                * atomic swap/close (LU-6785) */
+               rc = migrate_block(fd, fdv);
+               goto out;
        }
  
-       rc = llapi_file_get_stripe(name, lum);
-       /* failure can happen for many reasons and some may be not real errors
-        * (eg: no stripe)
-        * in case of a real error, a later call will fail with better
-        * error management */
-       if (rc == 0) {
-               if ((lum->lmm_magic == LOV_USER_MAGIC_V1 ||
-                    lum->lmm_magic == LOV_USER_MAGIC_V3) &&
-                   lum->lmm_stripe_size != 0)
-                       buf_size = lum->lmm_stripe_size;
+       rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
+       if (rc < 0) {
+               error_loc = "cannot get lease";
+               goto out;
        }
  
-       /* open file, direct io */
-       /* even if the file is only read, WR mode is nedeed to allow
-        * layout swap on fd */
-       fd = open(name, O_RDWR | O_DIRECT);
-       if (fd == -1) {
-               rc = -errno;
-               fprintf(stderr, "%s: cannot open '%s': %s\n", progname, name,
-                       strerror(-rc));
-               goto free;
-       }
-       if (file_lease_supported) {
-               rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
-               if (rc == -EOPNOTSUPP) {
-                       /* Older servers do not support file lease.
-                        * Disable related checks. This opens race conditions
-                        * as explained in LU-4840 */
-                       file_lease_supported = false;
-               } else if (rc < 0) {
-                       fprintf(stderr, "%s: %s: cannot get open lease: %s\n",
-                               progname, name, strerror(-rc));
-                       goto error;
+       rc = migrate_nonblock(fd, fdv);
+       if (rc < 0) {
+               llapi_lease_put(fd);
+               goto out;
+       }
+       /* Atomically put lease, swap layouts and close.
+        * for a migration we need to check data version on file did
+        * not change. */
+       rc = llapi_fswap_layouts(fd, fdv, 0, 0,
+                                migration_flags & MIGRATION_MIRROR ?
+                                MERGE_LAYOUTS_CLOSE : SWAP_LAYOUTS_CLOSE);
+       if (rc < 0) {
+               error_loc = "cannot swap layout";
+               goto out;
+       }
+ out:
+       if (fd >= 0)
+               close(fd);
+       if (fdv >= 0)
+               close(fdv);
+       if (rc < 0)
+               fprintf(stderr, "error: %s: %s: %s: %s\n",
+                       progname, name, error_loc, strerror(-rc));
+       return rc;
+ }
+ /**
+  * struct mirror_args - Command-line arguments for mirror(s).
+  * @m_count:  Number of mirrors to be created with this layout.
+  * @m_layout: Mirror layout.
+  * @m_file:   A victim file. Its layout will be split and used as a mirror.
+  * @m_next:   Point to the next node of the list.
+  *
+  * Command-line arguments for mirror(s) will be parsed and stored in
+  * a linked list that consists of this structure.
+  */
+ struct mirror_args {
+       __u32                   m_count;
+       struct llapi_layout     *m_layout;
+       const char              *m_file;
+       struct mirror_args      *m_next;
+ };
+ static inline int mirror_sanity_check_one(struct llapi_layout *layout)
+ {
+       uint64_t start, end;
+       uint64_t pattern;
+       int rc;
+       /* LU-10112: do not support dom+flr in phase 1 */
+       rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
+       if (rc)
+               return -errno;
+       rc = llapi_layout_pattern_get(layout, &pattern);
+       if (rc)
+               return -errno;
+       if (pattern == LOV_PATTERN_MDT || pattern == LLAPI_LAYOUT_MDT) {
+               fprintf(stderr, "error: %s: doesn't support dom+flr for now\n",
+                       progname);
+               return -ENOTSUP;
+       }
+       rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_LAST);
+       if (rc)
+               return -errno;
+       rc = llapi_layout_comp_extent_get(layout, &start, &end);
+       if (rc)
+               return -errno;
+       if (end != LUSTRE_EOF) {
+               fprintf(stderr, "error: %s: mirror layout doesn't reach eof\n",
+                       progname);
+               return -EINVAL;
+       }
+       return 0;
+ }
+ /**
+  * enum mirror_flags - Flags for extending a mirrored file.
+  * @NO_VERIFY: Indicates not to verify the mirror(s) from victim file(s)
+  *           in case the victim file(s) contains the same data as the
+  *           original mirrored file.
+  *
+  * Flags for extending a mirrored file.
+  */
+ enum mirror_flags {
+       NO_VERIFY       = 0x1,
+ };
+ /**
+  * mirror_create_sanity_check() - Check mirror list.
+  * @list:  A linked list that stores the mirror arguments.
+  *
+  * This function does a sanity check on @list for creating
+  * a mirrored file.
+  *
+  * Return: 0 on success or a negative error code on failure.
+  */
+ static int mirror_create_sanity_check(const char *fname,
+                                     struct mirror_args *list)
+ {
+       int rc = 0;
+       bool has_m_file = false;
+       bool has_m_layout = false;
+       if (list == NULL)
+               return -EINVAL;
+       if (fname) {
+               struct llapi_layout *layout;
+               layout = llapi_layout_get_by_path(fname, 0);
+               if (!layout) {
+                       fprintf(stderr,
+                               "error: %s: file '%s' couldn't get layout\n",
+                               progname, fname);
+                       return -ENODATA;
+               }
+               rc = mirror_sanity_check_one(layout);
+               llapi_layout_free(layout);
+               if (rc)
+                       return rc;
+       }
+       while (list != NULL) {
+               if (list->m_file != NULL) {
+                       has_m_file = true;
+                       llapi_layout_free(list->m_layout);
+                       list->m_layout =
+                               llapi_layout_get_by_path(list->m_file, 0);
+                       if (list->m_layout == NULL) {
+                               fprintf(stderr,
+                                       "error: %s: file '%s' has no layout\n",
+                                       progname, list->m_file);
+                               return -ENODATA;
+                       }
                } else {
-                       have_lease_rdlck = true;
+                       if (list->m_layout != NULL)
+                               has_m_layout = true;
+                       else {
+                               fprintf(stderr, "error: %s: no mirror layout\n",
+                                       progname);
+                               return -EINVAL;
+                       }
                }
+               rc = mirror_sanity_check_one(list->m_layout);
+               if (rc)
+                       return rc;
+               list = list->m_next;
        }
  
-       /* search for file directory pathname */
-       if (strlen(name) > sizeof(parent)-1) {
-               rc = -E2BIG;
-               goto error;
+       if (has_m_file && has_m_layout) {
+               fprintf(stderr, "error: %s: -f <victim_file> option should not "
+                       "be specified with setstripe options or "
+                       "--parent option\n", progname);
+               return -EINVAL;
        }
-       strncpy(parent, name, sizeof(parent));
-       ptr = strrchr(parent, '/');
-       if (ptr == NULL) {
-               if (getcwd(parent, sizeof(parent)) == NULL) {
-                       rc = -errno;
-                       goto error;
+       return 0;
+ }
+ /**
+  * mirror_create() - Create a mirrored file.
+  * @fname:        The file to be created.
+  * @mirror_list:  A linked list that stores the mirror arguments.
+  *
+  * This function creates a mirrored file @fname with the mirror(s)
+  * from @mirror_list.
+  *
+  * Return: 0 on success or a negative error code on failure.
+  */
+ static int mirror_create(char *fname, struct mirror_args *mirror_list)
+ {
+       struct llapi_layout *layout = NULL;
+       struct mirror_args *cur_mirror = NULL;
+       uint16_t mirror_count = 0;
+       int i = 0;
+       int rc = 0;
+       rc = mirror_create_sanity_check(NULL, mirror_list);
+       if (rc)
+               return rc;
+       cur_mirror = mirror_list;
+       while (cur_mirror != NULL) {
+               for (i = 0; i < cur_mirror->m_count; i++) {
+                       rc = llapi_layout_merge(&layout, cur_mirror->m_layout);
+                       if (rc) {
+                               rc = -errno;
+                               fprintf(stderr, "error: %s: "
+                                       "merge layout failed: %s\n",
+                                       progname, strerror(errno));
+                               goto error;
+                       }
                }
-       } else {
-               if (ptr == parent)
-                       strcpy(parent, "/");
-               else
-                       *ptr = '\0';
+               mirror_count += cur_mirror->m_count;
+               cur_mirror = cur_mirror->m_next;
        }
  
-       rc = llapi_file_fget_mdtidx(fd, &mdt_index);
-       if (rc < 0) {
-               fprintf(stderr, "%s: %s: cannot get MDT index: %s\n",
-                       progname, name, strerror(-rc));
+       rc = llapi_layout_mirror_count_set(layout, mirror_count);
+       if (rc) {
+               rc = -errno;
+               fprintf(stderr, "error: %s: set mirror count failed: %s\n",
+                       progname, strerror(errno));
                goto error;
        }
  
-       do {
-               int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW;
-               mode_t open_mode = S_IRUSR | S_IWUSR;
+       rc = lfs_component_create(fname, O_CREAT | O_WRONLY, 0644,
+                                 layout);
+       if (rc >= 0) {
+               close(rc);
+               rc = 0;
+       }
  
-               random_value = random();
-               rc = snprintf(volatile_file, sizeof(volatile_file),
-                             "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR,
-                             mdt_index, random_value);
-               if (rc >= sizeof(volatile_file)) {
-                       rc = -E2BIG;
-                       goto error;
+ error:
+       llapi_layout_free(layout);
+       return rc;
+ }
+ /**
+  * Compare files and check lease on @fd.
+  *
+  * \retval bytes number of bytes are the same
+  */
+ static ssize_t mirror_file_compare(int fd, int fdv)
+ {
+       const size_t buflen = 4 * 1024 * 1024; /* 4M */
+       void *buf;
+       ssize_t bytes_done = 0;
+       ssize_t bytes_read = 0;
+       buf = malloc(buflen * 2);
+       if (!buf)
+               return -ENOMEM;
+       while (1) {
+               if (!llapi_lease_check(fd)) {
+                       bytes_done = -EBUSY;
+                       break;
                }
  
-               /* create, open a volatile file, use caching (ie no directio) */
-               if (param != NULL)
-                       fdv = llapi_file_open_param(volatile_file, open_flags,
-                                                   open_mode, param);
-               else if (layout != NULL)
-                       fdv = lfs_component_create(volatile_file, open_flags,
-                                                  open_mode, layout);
-               else
-                       fdv = -EINVAL;
-       } while (fdv == -EEXIST);
+               bytes_read = read(fd, buf, buflen);
+               if (bytes_read <= 0)
+                       break;
  
-       if (fdv < 0) {
-               rc = fdv;
-               fprintf(stderr, "%s: %s: cannot create volatile file in"
-                               " directory: %s\n",
-                       progname, parent, strerror(-rc));
-               goto error;
+               if (bytes_read != read(fdv, buf + buflen, buflen))
+                       break;
+               /* XXX: should compute the checksum on each buffer and then
+                * compare checksum to avoid cache collision */
+               if (memcmp(buf, buf + buflen, bytes_read))
+                       break;
+               bytes_done += bytes_read;
        }
  
-       /* In case the MDT does not support creation of volatile files
-        * we should try to unlink it. */
-       (void)unlink(volatile_file);
+       free(buf);
  
-       /* Not-owner (root?) special case.
-        * Need to set owner/group of volatile file like original.
-        * This will allow to pass related check during layout_swap.
-        */
-       rc = fstat(fd, &st);
-       if (rc != 0) {
+       return bytes_done;
+ }
+ static int mirror_extend_file(const char *fname, const char *victim_file,
+                             enum mirror_flags mirror_flags)
+ {
+       int fd = -1;
+       int fdv = -1;
+       struct stat stbuf;
+       struct stat stbuf_v;
+       __u64 dv;
+       int rc;
+       fd = open(fname, O_RDWR);
+       if (fd < 0) {
+               error_loc = "open source file";
                rc = -errno;
-               fprintf(stderr, "%s: %s: cannot stat: %s\n", progname, name,
-                       strerror(errno));
-               goto error;
+               goto out;
        }
-       rc = fstat(fdv, &stv);
-       if (rc != 0) {
+       fdv = open(victim_file, O_RDWR);
+       if (fdv < 0) {
+               error_loc = "open target file";
                rc = -errno;
-               fprintf(stderr, "%s: %s: cannot stat: %s\n", progname,
-                       volatile_file, strerror(errno));
-               goto error;
+               goto out;
        }
-       if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) {
-               rc = fchown(fdv, st.st_uid, st.st_gid);
-               if (rc != 0) {
-                       rc = -errno;
-                       fprintf(stderr, "%s: %s: cannot chown: %s\n", progname,
-                               name, strerror(errno));
-                       goto error;
-               }
+       if (fstat(fd, &stbuf) || fstat(fdv, &stbuf_v)) {
+               error_loc = "stat source or target file";
+               rc = -errno;
+               goto out;
+       }
+       if (stbuf.st_dev != stbuf_v.st_dev) {
+               error_loc = "stat source and target file";
+               rc = -EXDEV;
+               goto out;
        }
  
-       if (migration_flags & MIGRATION_NONBLOCK && file_lease_supported) {
-               rc = migrate_nonblock(fd, fdv, &st, buf_size, name);
-               if (rc == 0) {
-                       have_lease_rdlck = false;
-                       fdv = -1; /* The volatile file is closed as we put the
-                                  * lease in non-blocking mode. */
+       /* mirrors should be of the same size */
+       if (stbuf.st_size != stbuf_v.st_size) {
+               error_loc = "file sizes don't match";
+               rc = -EINVAL;
+               goto out;
+       }
+       rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
+       if (rc < 0) {
+               error_loc = "cannot get lease";
+               goto out;
+       }
+       if (!(mirror_flags & NO_VERIFY)) {
+               ssize_t ret;
+               /* mirrors should have the same contents */
+               ret = mirror_file_compare(fd, fdv);
+               if (ret != stbuf.st_size) {
+                       error_loc = "file busy or contents don't match";
+                       rc = ret < 0 ? ret : -EINVAL;
+                       goto out;
                }
-       } else {
-               /* Blocking mode (forced if servers do not support file lease).
-                * It is also the default mode, since we cannot distinguish
-                * between a broken lease and a server that does not support
-                * atomic swap/close (LU-6785) */
-               rc = migrate_block(fd, fdv, &st, buf_size, name);
        }
  
- error:
-       if (have_lease_rdlck)
-               llapi_lease_put(fd);
+       /* Get rid of caching pages from clients */
+       rc = llapi_get_data_version(fd, &dv, LL_DV_WR_FLUSH);
+       if (rc < 0) {
+               error_loc = "cannot get data version";
+               return rc;
+       }
+       rc = llapi_get_data_version(fdv, &dv, LL_DV_WR_FLUSH);
+       if (rc < 0) {
+               error_loc = "cannot get data version";
+               return rc;
+       }
+       /* Make sure we keep original atime/mtime values */
+       rc = migrate_copy_timestamps(fd, fdv);
  
+       /* Atomically put lease, swap layouts and close.
+        * for a migration we need to check data version on file did
+        * not change. */
+       rc = llapi_fswap_layouts(fd, fdv, 0, 0, MERGE_LAYOUTS_CLOSE);
+       if (rc < 0) {
+               error_loc = "cannot swap layout";
+               goto out;
+       }
+ out:
        if (fd >= 0)
                close(fd);
  
        if (fdv >= 0)
                close(fdv);
  
- free:
-       if (lum)
-               free(lum);
+       if (!rc)
+               (void) unlink(victim_file);
+       if (rc < 0)
+               fprintf(stderr, "error: %s: %s: %s: %s\n",
+                       progname, fname, error_loc, strerror(-rc));
+       return rc;
+ }
+ static int mirror_extend(char *fname, struct mirror_args *mirror_list,
+                        enum mirror_flags mirror_flags)
+ {
+       int rc;
+       rc = mirror_create_sanity_check(fname, mirror_list);
+       if (rc)
+               return rc;
+       while (mirror_list) {
+               if (mirror_list->m_file != NULL) {
+                       rc = mirror_extend_file(fname, mirror_list->m_file,
+                                               mirror_flags);
+               } else {
+                       __u32 mirror_count = mirror_list->m_count;
+                       while (mirror_count > 0) {
+                               rc = lfs_migrate(fname,
+                                       MIGRATION_NONBLOCK | MIGRATION_MIRROR,
+                                       NULL, mirror_list->m_layout);
+                               if (rc)
+                                       break;
+                               --mirror_count;
+                       }
+               }
+               if (rc)
+                       break;
+               mirror_list = mirror_list->m_next;
+       }
  
        return rc;
  }
@@@ -1015,11 -1455,11 +1461,11 @@@ static int parse_targets(__u32 *osts, i
  struct lfs_setstripe_args {
        unsigned long long       lsa_comp_end;
        unsigned long long       lsa_stripe_size;
-       int                      lsa_stripe_count;
-       int                      lsa_stripe_off;
+       long long                lsa_stripe_count;
+       long long                lsa_stripe_off;
        __u32                    lsa_comp_flags;
        int                      lsa_nr_osts;
-       int                      lsa_pattern;
+       unsigned long long       lsa_pattern;
        __u32                   *lsa_osts;
        char                    *lsa_pool_name;
  };
  static inline void setstripe_args_init(struct lfs_setstripe_args *lsa)
  {
        memset(lsa, 0, sizeof(*lsa));
-       lsa->lsa_stripe_off = -1;
+       lsa->lsa_stripe_size = LLAPI_LAYOUT_DEFAULT;
+       lsa->lsa_stripe_count = LLAPI_LAYOUT_DEFAULT;
+       lsa->lsa_stripe_off = LLAPI_LAYOUT_DEFAULT;
+       lsa->lsa_pattern = LLAPI_LAYOUT_RAID0;
+       lsa->lsa_pool_name = NULL;
+ }
+ /**
+  * setstripe_args_init_inherit() - Initialize and inherit stripe options.
+  * @lsa: Stripe options to be initialized and inherited.
+  *
+  * This function initializes stripe options in @lsa and inherit
+  * stripe_size, stripe_count and OST pool_name options.
+  *
+  * Return: void.
+  */
+ static inline void setstripe_args_init_inherit(struct lfs_setstripe_args *lsa)
+ {
+       unsigned long long stripe_size;
+       long long stripe_count;
+       char *pool_name = NULL;
+       stripe_size = lsa->lsa_stripe_size;
+       stripe_count = lsa->lsa_stripe_count;
+       pool_name = lsa->lsa_pool_name;
+       setstripe_args_init(lsa);
+       lsa->lsa_stripe_size = stripe_size;
+       lsa->lsa_stripe_count = stripe_count;
+       lsa->lsa_pool_name = pool_name;
  }
  
  static inline bool setstripe_args_specified(struct lfs_setstripe_args *lsa)
  {
-       return (lsa->lsa_stripe_size != 0 || lsa->lsa_stripe_count != 0 ||
-               lsa->lsa_stripe_off != -1 || lsa->lsa_pool_name != NULL ||
-               lsa->lsa_comp_end != 0 || lsa->lsa_pattern != 0);
+       return (lsa->lsa_stripe_size != LLAPI_LAYOUT_DEFAULT ||
+               lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT ||
+               lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT ||
+               lsa->lsa_pattern != LLAPI_LAYOUT_RAID0 ||
+               lsa->lsa_pool_name != NULL ||
+               lsa->lsa_comp_end != 0);
  }
  
+ /**
+  * comp_args_to_layout() - Create or extend a composite layout.
+  * @composite:       Pointer to the composite layout.
+  * @lsa:             Stripe options for the new component.
+  *
+  * This function creates or extends a composite layout by adding a new
+  * component with stripe options from @lsa.
+  *
+  * Return: 0 on success or an error code on failure.
+  */
  static int comp_args_to_layout(struct llapi_layout **composite,
                               struct lfs_setstripe_args *lsa)
  {
        if (lsa->lsa_pattern == LLAPI_LAYOUT_MDT) {
                /* In case of Data-on-MDT patterns the only extra option
                 * applicable is stripe size option. */
-               if (lsa->lsa_stripe_count) {
+               if (lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT) {
                        fprintf(stderr, "Option 'stripe-count' can't be "
-                               "specified with Data-on-MDT component: %i\n",
+                               "specified with Data-on-MDT component: %lld\n",
                                lsa->lsa_stripe_count);
                        return -EINVAL;
                }
-               if (lsa->lsa_stripe_size) {
+               if (lsa->lsa_stripe_size != LLAPI_LAYOUT_DEFAULT) {
                        fprintf(stderr, "Option 'stripe-size' can't be "
                                "specified with Data-on-MDT component: %llu\n",
                                lsa->lsa_stripe_size);
                                lsa->lsa_nr_osts);
                        return -EINVAL;
                }
-               if (lsa->lsa_stripe_off != -1) {
+               if (lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT) {
                        fprintf(stderr, "Option 'stripe-offset' can't be "
-                               "specified with Data-on-MDT component: %i\n",
+                               "specified with Data-on-MDT component: %lld\n",
                                lsa->lsa_stripe_off);
                        return -EINVAL;
                }
  
                rc = llapi_layout_pattern_set(layout, lsa->lsa_pattern);
                if (rc) {
-                       fprintf(stderr, "Set stripe pattern %#x failed. %s\n",
+                       fprintf(stderr, "Set stripe pattern %#llx failed. %s\n",
                                lsa->lsa_pattern, strerror(errno));
                        return rc;
                }
                lsa->lsa_stripe_size = lsa->lsa_comp_end;
        }
  
-       if (lsa->lsa_stripe_size != 0) {
-               rc = llapi_layout_stripe_size_set(layout,
-                                                 lsa->lsa_stripe_size);
-               if (rc) {
-                       fprintf(stderr, "Set stripe size %llu failed. %s\n",
-                               lsa->lsa_stripe_size, strerror(errno));
-                       return rc;
-               }
+       rc = llapi_layout_stripe_size_set(layout, lsa->lsa_stripe_size);
+       if (rc) {
+               fprintf(stderr, "Set stripe size %llu failed: %s\n",
+                       lsa->lsa_stripe_size, strerror(errno));
+               return rc;
        }
  
-       if (lsa->lsa_stripe_count != 0) {
-               rc = llapi_layout_stripe_count_set(layout,
-                                                  lsa->lsa_stripe_count == -1 ?
-                                                  LLAPI_LAYOUT_WIDE :
-                                                  lsa->lsa_stripe_count);
-               if (rc) {
-                       fprintf(stderr, "Set stripe count %d failed. %s\n",
-                               lsa->lsa_stripe_count, strerror(errno));
-                       return rc;
-               }
+       rc = llapi_layout_stripe_count_set(layout, lsa->lsa_stripe_count);
+       if (rc) {
+               fprintf(stderr, "Set stripe count %lld failed: %s\n",
+                       lsa->lsa_stripe_count, strerror(errno));
+               return rc;
        }
  
        if (lsa->lsa_pool_name != NULL) {
                                lsa->lsa_pool_name, strerror(errno));
                        return rc;
                }
+       } else {
+               rc = llapi_layout_pool_name_set(layout, "");
+               if (rc) {
+                       fprintf(stderr, "Clear pool name failed: %s\n",
+                               strerror(errno));
+                       return rc;
+               }
        }
  
        if (lsa->lsa_nr_osts > 0) {
                if (lsa->lsa_stripe_count > 0 &&
+                   lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT &&
+                   lsa->lsa_stripe_count != LLAPI_LAYOUT_WIDE &&
                    lsa->lsa_nr_osts != lsa->lsa_stripe_count) {
-                       fprintf(stderr, "stripe_count(%d) != nr_osts(%d)\n",
+                       fprintf(stderr, "stripe_count(%lld) != nr_osts(%d)\n",
                                lsa->lsa_stripe_count, lsa->lsa_nr_osts);
                        return -EINVAL;
                }
                        if (rc)
                                break;
                }
-       } else if (lsa->lsa_stripe_off != -1) {
+       } else if (lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT) {
                rc = llapi_layout_ost_index_set(layout, 0, lsa->lsa_stripe_off);
        }
        if (rc) {
@@@ -1189,19 -1674,14 +1680,19 @@@ static int adjust_first_extent(char *fn
        uint64_t start, end, stripe_size, prev_end = 0;
        int rc;
  
 -      if (layout == NULL)
 +      if (layout == NULL) {
 +              fprintf(stderr,
 +                      "%s setstripe: layout must be specified\n",
 +                      progname);
                return -EINVAL;
 +      }
  
        errno = 0;
        head = llapi_layout_get_by_path(fname, 0);
        if (head == NULL) {
 -              fprintf(stderr, "Read layout from %s failed. %s\n",
 -                      fname, strerror(errno));
 +              fprintf(stderr,
 +                      "%s setstripe: cannot read layout from '%s': %s\n",
 +                      progname, fname, strerror(errno));
                return -EINVAL;
        } else if (errno == ENODATA) {
                /* file without LOVEA, this component-add will be turned
                llapi_layout_free(head);
                return -ENODATA;
        } else if (!llapi_layout_is_composite(head)) {
 -              fprintf(stderr, "'%s' isn't a composite file.\n",
 -                      fname);
 +              fprintf(stderr, "%s setstripe: '%s' not a composite file\n",
 +                      progname, fname);
                llapi_layout_free(head);
                return -EINVAL;
        }
  
        rc = llapi_layout_comp_extent_get(head, &start, &prev_end);
        if (rc) {
 -              fprintf(stderr, "Get prev extent failed. %s\n",
 -                      strerror(errno));
 +              fprintf(stderr, "%s setstripe: cannot get prev extent: %s\n",
 +                      progname, strerror(errno));
                llapi_layout_free(head);
                return rc;
        }
        /* Make sure we use the first component of the layout to be added. */
        rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
        if (rc < 0) {
 -              fprintf(stderr, "Move component cursor failed. %s\n",
 -                      strerror(errno));
 +              fprintf(stderr,
 +                      "%s setstripe: cannot move component cursor: %s\n",
 +                      progname, strerror(errno));
                return rc;
        }
  
        rc = llapi_layout_comp_extent_get(layout, &start, &end);
        if (rc) {
 -              fprintf(stderr, "Get extent failed. %s\n", strerror(errno));
 +              fprintf(stderr, "%s setstripe: cannot get extent: %s\n",
 +                      progname, strerror(errno));
                return rc;
        }
  
        if (start > prev_end || end <= prev_end) {
 -              fprintf(stderr, "First extent to be set [%lu, %lu) isn't "
 -                      "adjacent with the existing file extent end: %lu\n",
 -                      start, end, prev_end);
 +              fprintf(stderr,
 +                      "%s setstripe: first extent [%lu, %lu) not adjacent with extent end %lu\n",
 +                      progname, start, end, prev_end);
                return -EINVAL;
        }
  
        rc = llapi_layout_stripe_size_get(layout, &stripe_size);
        if (rc) {
 -              fprintf(stderr, "Get stripe size failed. %s\n",
 -                      strerror(errno));
 +              fprintf(stderr, "%s setstripe: cannot get stripe size: %s\n",
 +                      progname, strerror(errno));
                return rc;
        }
  
        if (stripe_size != LLAPI_LAYOUT_DEFAULT &&
            (prev_end & (stripe_size - 1))) {
 -              fprintf(stderr, "Stripe size %lu not aligned with %lu\n",
 -                      stripe_size, prev_end);
 +              fprintf(stderr,
 +                      "%s setstripe: stripe size %lu not aligned with %lu\n",
 +                      progname, stripe_size, prev_end);
                return -EINVAL;
        }
  
        rc = llapi_layout_comp_extent_set(layout, prev_end, end);
        if (rc) {
 -              fprintf(stderr, "Set component extent [%lu, %lu) failed. %s\n",
 -                      prev_end, end, strerror(errno));
 +              fprintf(stderr,
 +                      "%s setstripe: cannot set component extent [%lu, %lu): %s\n",
 +                      progname, prev_end, end, strerror(errno));
                return rc;
        }
  
@@@ -1344,6 -1820,63 +1835,63 @@@ static inline bool arg_is_eof(char *arg
               !strncmp(arg, "eof", strlen("eof"));
  }
  
+ /**
+  * lfs_mirror_alloc() - Allocate a mirror argument structure.
+  *
+  * Return: Valid mirror_args pointer on success and
+  *         NULL if memory allocation fails.
+  */
+ static struct mirror_args *lfs_mirror_alloc(void)
+ {
+       struct mirror_args *mirror = NULL;
+       while (1) {
+               mirror = calloc(1, sizeof(*mirror));
+               if (mirror != NULL)
+                       break;
+               sleep(1);
+       }
+       return mirror;
+ }
+ /**
+  * lfs_mirror_free() - Free memory allocated for a mirror argument
+  *                     structure.
+  * @mirror: Previously allocated mirror argument structure by
+  *        lfs_mirror_alloc().
+  *
+  * Free memory allocated for @mirror.
+  *
+  * Return: void.
+  */
+ static void lfs_mirror_free(struct mirror_args *mirror)
+ {
+       if (mirror->m_layout != NULL)
+               llapi_layout_free(mirror->m_layout);
+       free(mirror);
+ }
+ /**
+  * lfs_mirror_list_free() - Free memory allocated for a mirror list.
+  * @mirror_list: Previously allocated mirror list.
+  *
+  * Free memory allocated for @mirror_list.
+  *
+  * Return: void.
+  */
+ static void lfs_mirror_list_free(struct mirror_args *mirror_list)
+ {
+       struct mirror_args *next_mirror = NULL;
+       while (mirror_list != NULL) {
+               next_mirror = mirror_list->m_next;
+               lfs_mirror_free(mirror_list);
+               mirror_list = next_mirror;
+       }
+ }
  enum {
        LFS_POOL_OPT = 3,
        LFS_COMP_COUNT_OPT,
        LFS_COMP_DEL_OPT,
        LFS_COMP_SET_OPT,
        LFS_COMP_ADD_OPT,
+       LFS_COMP_USE_PARENT_OPT,
+       LFS_COMP_NO_VERIFY_OPT,
        LFS_PROJID_OPT,
  };
  
  /* functions */
- static int lfs_setstripe(int argc, char **argv)
+ static int lfs_setstripe0(int argc, char **argv, enum setstripe_origin opc)
  {
        struct lfs_setstripe_args        lsa;
        struct llapi_stripe_param       *param = NULL;
        int                              comp_add = 0;
        __u32                            comp_id = 0;
        struct llapi_layout             *layout = NULL;
+       struct llapi_layout             **lpp = &layout;
+       bool                             mirror_mode = false;
+       bool                             has_m_file = false;
+       __u32                            mirror_count = 0;
+       enum mirror_flags                mirror_flags = 0;
+       struct mirror_args              *mirror_list = NULL;
+       struct mirror_args              *new_mirror = NULL;
+       struct mirror_args              *last_mirror = NULL;
+       char                             cmd[PATH_MAX];
  
        struct option long_opts[] = {
                /* --block is only valid in migrate mode */
        { .val = LFS_COMP_SET_OPT,
                        .name = "component-set",
                                                .has_arg = no_argument},
+       { .val = LFS_COMP_USE_PARENT_OPT,
+                       .name = "parent",       .has_arg = no_argument},
+       { .val = LFS_COMP_NO_VERIFY_OPT,
+                       .name = "no-verify",    .has_arg = no_argument},
        { .val = 'c',   .name = "stripe-count", .has_arg = required_argument},
        { .val = 'c',   .name = "stripe_count", .has_arg = required_argument},
        { .val = 'd',   .name = "delete",       .has_arg = no_argument},
        { .val = 'E',   .name = "comp-end",     .has_arg = required_argument},
        { .val = 'E',   .name = "component-end",
                                                .has_arg = required_argument},
+       { .val = 'f',   .name = "file",         .has_arg = required_argument },
        /* dirstripe {"mdt-hash",     required_argument, 0, 'H'}, */
        { .val = 'i',   .name = "stripe-index", .has_arg = required_argument},
        { .val = 'i',   .name = "stripe_index", .has_arg = required_argument},
        { .val = 'm',   .name = "mdt",          .has_arg = required_argument},
        { .val = 'm',   .name = "mdt-index",    .has_arg = required_argument},
        { .val = 'm',   .name = "mdt_index",    .has_arg = required_argument},
+       { .val = 'N',   .name = "mirror-count", .has_arg = optional_argument},
        /* --non-block is only valid in migrate mode */
        { .val = 'n',   .name = "non-block",    .has_arg = no_argument},
        { .val = 'o',   .name = "ost",          .has_arg = required_argument},
        /* dirstripe {"mdt-count",    required_argument, 0, 'T'}, */
        /* --verbose is only valid in migrate mode */
        { .val = 'v',   .name = "verbose",      .has_arg = no_argument },
-       { .val = LFS_COMP_ADD_OPT,
-                       .name = "component-add",
-                                               .has_arg = no_argument },
-       { .val = LFS_COMP_DEL_OPT,
-                       .name = "component-del",
-                                               .has_arg = no_argument },
-       { .val = LFS_COMP_FLAGS_OPT,
-                       .name = "component-flags",
-                                               .has_arg = required_argument },
-       { .val = LFS_COMP_SET_OPT,
-                       .name = "component-set",
-                                               .has_arg = no_argument },
        { .name = NULL } };
  
        setstripe_args_init(&lsa);
  
-       if (strcmp(argv[0], "migrate") == 0)
-               migrate_mode = true;
+       migrate_mode = (opc == SO_MIGRATE);
+       mirror_mode = (opc == SO_MIRROR_CREATE || opc == SO_MIRROR_EXTEND);
  
-       while ((c = getopt_long(argc, argv, "bc:dE:i:I:m:no:p:L:s:S:v",
+       snprintf(cmd, sizeof(cmd), "%s %s", progname, argv[0]);
+       progname = cmd;
+       while ((c = getopt_long(argc, argv, "bc:dE:f:i:I:m:N::no:p:L:s:S:v",
                                long_opts, NULL)) >= 0) {
                switch (c) {
                case 0:
                case LFS_COMP_SET_OPT:
                        comp_set = 1;
                        break;
+               case LFS_COMP_USE_PARENT_OPT:
+                       if (!mirror_mode) {
+                               fprintf(stderr, "error: %s: --parent must be "
+                                       "specified with --mirror-count|-N "
+                                       "option\n", progname);
+                               goto usage_error;
+                       }
+                       setstripe_args_init(&lsa);
+                       break;
+               case LFS_COMP_NO_VERIFY_OPT:
+                       mirror_flags |= NO_VERIFY;
+                       break;
                case 'b':
                        if (!migrate_mode) {
                                fprintf(stderr,
                                        progname, argv[0], optarg);
                                goto usage_error;
                        }
+                       if (lsa.lsa_stripe_count == -1)
+                               lsa.lsa_stripe_count = LLAPI_LAYOUT_WIDE;
                        break;
                case 'd':
                        /* delete the default striping pattern */
                        break;
                case 'E':
                        if (lsa.lsa_comp_end != 0) {
-                               result = comp_args_to_layout(&layout, &lsa);
+                               result = comp_args_to_layout(lpp, &lsa);
                                if (result) {
                                        fprintf(stderr,
                                                "%s %s: invalid layout\n",
                                        goto usage_error;
                                }
  
-                               setstripe_args_init(&lsa);
+                               setstripe_args_init_inherit(&lsa);
                        }
  
                        if (arg_is_eof(optarg)) {
                                        progname, argv[0], optarg);
                                goto usage_error;
                        }
+                       if (lsa.lsa_stripe_off == -1)
+                               lsa.lsa_stripe_off = LLAPI_LAYOUT_DEFAULT;
                        break;
                case 'I':
                        comp_id = strtoul(optarg, &end, 0);
                                goto usage_error;
                        }
                        break;
+               case 'f':
+                       if (opc != SO_MIRROR_EXTEND) {
+                               fprintf(stderr,
+                                       "error: %s: invalid option: %s\n",
+                                       progname, argv[optopt + 1]);
+                               goto usage_error;
+                       }
+                       if (last_mirror == NULL) {
+                               fprintf(stderr, "error: %s: '-N' must exist "
+                                       "in front of '%s'\n",
+                                       progname, argv[optopt + 1]);
+                               goto usage_error;
+                       }
+                       last_mirror->m_file = optarg;
+                       last_mirror->m_count = 1;
+                       has_m_file = true;
+                       break;
                case 'L':
                        if (strcmp(argv[optind - 1], "mdt") == 0) {
                                /* Can be only the first component */
                        }
                        migration_flags |= MIGRATION_NONBLOCK;
                        break;
+               case 'N':
+                       if (opc == SO_SETSTRIPE) {
+                               opc = SO_MIRROR_CREATE;
+                               mirror_mode = true;
+                       }
+                       mirror_count = 1;
+                       if (optarg != NULL) {
+                               mirror_count = strtoul(optarg, &end, 0);
+                               if (*end != '\0' || mirror_count == 0) {
+                                       fprintf(stderr,
+                                               "error: %s: bad mirror count: %s\n",
+                                               progname, optarg);
+                                       result = -EINVAL;
+                                       goto error;
+                               }
+                       }
+                       new_mirror = lfs_mirror_alloc();
+                       new_mirror->m_count = mirror_count;
+                       if (mirror_list == NULL)
+                               mirror_list = new_mirror;
+                       if (last_mirror != NULL) {
+                               /* wrap up last mirror */
+                               if (lsa.lsa_comp_end == 0)
+                                       lsa.lsa_comp_end = LUSTRE_EOF;
+                               result = comp_args_to_layout(lpp, &lsa);
+                               if (result) {
+                                       lfs_mirror_free(new_mirror);
+                                       goto error;
+                               }
+                               setstripe_args_init_inherit(&lsa);
+                               last_mirror->m_next = new_mirror;
+                       }
+                       last_mirror = new_mirror;
+                       lpp = &last_mirror->m_layout;
+                       break;
                case 'o':
                        lsa.lsa_nr_osts = parse_targets(osts,
                                                sizeof(osts) / sizeof(__u32),
                        }
  
                        lsa.lsa_osts = osts;
-                       if (lsa.lsa_stripe_off == -1)
+                       if (lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT)
                                lsa.lsa_stripe_off = osts[0];
                        break;
                case 'p':
  
        fname = argv[optind];
  
-       if (lsa.lsa_comp_end != 0) {
-               result = comp_args_to_layout(&layout, &lsa);
-               if (result) {
-                       fprintf(stderr, "%s %s: invalid component layout\n",
-                               progname, argv[0]);
-                       goto usage_error;
-               }
-       }
        if (optind == argc) {
                fprintf(stderr, "%s %s: FILE must be specified\n",
                        progname, argv[0]);
                goto usage_error;
        }
  
+       if (mirror_mode && mirror_count == 0) {
+               fprintf(stderr,
+                       "error: %s: --mirror-count|-N option is required\n",
+                       progname);
+               result = -EINVAL;
+               goto error;
+       }
+       if (mirror_mode) {
+               if (lsa.lsa_comp_end == 0)
+                       lsa.lsa_comp_end = LUSTRE_EOF;
+       }
+       if (lsa.lsa_comp_end != 0) {
+               result = comp_args_to_layout(lpp, &lsa);
+               if (result)
+                       goto error;
+       }
+       if (mirror_flags & NO_VERIFY) {
+               if (opc != SO_MIRROR_EXTEND) {
+                       fprintf(stderr,
+                               "error: %s: --no-verify is valid only for lfs mirror extend command\n",
+                               progname);
+                       result = -EINVAL;
+                       goto error;
+               } else if (!has_m_file) {
+                       fprintf(stderr,
+                               "error: %s: --no-verify must be specified with -f <victim_file> option\n",
+                               progname);
+                       result = -EINVAL;
+                       goto error;
+               }
+       }
        /* Only LCME_FL_INIT flags is used in PFL, and it shouldn't be
         * altered by user space tool, so we don't need to support the
         * --component-set for this moment. */
                                progname);
                        goto usage_error;
                }
+               if (mirror_mode) {
+                       fprintf(stderr, "error: %s: can't use --component-add "
+                               "or --component-del for mirror operation\n",
+                               progname);
+                       goto usage_error;
+               }
        }
  
        if (comp_add) {
                                progname, argv[0]);
                        goto usage_error;
                }
                result = adjust_first_extent(fname, layout);
                if (result == -ENODATA)
                        comp_add = 0;
                        goto error;
                }
  
-               param->lsp_stripe_size = lsa.lsa_stripe_size;
-               param->lsp_stripe_offset = lsa.lsa_stripe_off;
-               param->lsp_stripe_count = lsa.lsa_stripe_count;
+               if (lsa.lsa_stripe_size != LLAPI_LAYOUT_DEFAULT)
+                       param->lsp_stripe_size = lsa.lsa_stripe_size;
+               if (lsa.lsa_stripe_count != LLAPI_LAYOUT_DEFAULT) {
+                       if (lsa.lsa_stripe_count == LLAPI_LAYOUT_WIDE)
+                               param->lsp_stripe_count = -1;
+                       else
+                               param->lsp_stripe_count = lsa.lsa_stripe_count;
+               }
+               if (lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT)
+                       param->lsp_stripe_offset = -1;
+               else
+                       param->lsp_stripe_offset = lsa.lsa_stripe_off;
                param->lsp_pool = lsa.lsa_pool_name;
                param->lsp_is_specific = false;
                if (lsa.lsa_nr_osts > 0) {
                        if (lsa.lsa_stripe_count > 0 &&
+                           lsa.lsa_stripe_count != LLAPI_LAYOUT_DEFAULT &&
+                           lsa.lsa_stripe_count != LLAPI_LAYOUT_WIDE &&
                            lsa.lsa_nr_osts != lsa.lsa_stripe_count) {
-                               fprintf(stderr,
-                                       "%s %s: stripe count '%d' does not match number of OSTs: %d\n",
-                                       progname, argv[0], lsa.lsa_stripe_count,
+                               fprintf(stderr, "error: %s: stripe count %lld "
+                                       "doesn't match the number of OSTs: %d\n"
+                                       , argv[0], lsa.lsa_stripe_count,
                                        lsa.lsa_nr_osts);
                                free(param);
                                goto usage_error;
                                                   lsa.lsa_comp_flags);
                } else if (comp_add != 0) {
                        result = lfs_component_add(fname, layout);
+               } else if (opc == SO_MIRROR_CREATE) {
+                       result = mirror_create(fname, mirror_list);
+               } else if (opc == SO_MIRROR_EXTEND) {
+                       result = mirror_extend(fname, mirror_list,
+                                              mirror_flags);
                } else if (layout != NULL) {
                        result = lfs_component_create(fname, O_CREAT | O_WRONLY,
                                                      0644, layout);
  
        free(param);
        llapi_layout_free(layout);
+       lfs_mirror_list_free(mirror_list);
        return result2;
  usage_error:
        result = CMD_HELP;
  error:
        llapi_layout_free(layout);
+       lfs_mirror_list_free(mirror_list);
        return result;
  }
  
        return rc ? : rc1;
  }
  
 +static int get_print_quota(char *mnt, char *name, struct if_quotactl *qctl,
 +                         int verbose, int quiet, bool human_readable)
 +{
 +      int rc1 = 0, rc2 = 0, rc3 = 0;
 +      char *obd_type = (char *)qctl->obd_type;
 +      char *obd_uuid = (char *)qctl->obd_uuid.uuid;
 +      __u64 total_ialloc = 0, total_balloc = 0;
 +      int inacc;
 +
 +      rc1 = llapi_quotactl(mnt, qctl);
 +      if (rc1 < 0) {
 +              switch (rc1) {
 +              case -ESRCH:
 +                      fprintf(stderr, "%s quotas are not enabled.\n",
 +                              qtype_name(qctl->qc_type));
 +                      goto out;
 +              case -EPERM:
 +                      fprintf(stderr, "Permission denied.\n");
 +              case -ENODEV:
 +              case -ENOENT:
 +                      /* We already got error message. */
 +                      goto out;
 +              default:
 +                      fprintf(stderr, "Unexpected quotactl error: %s\n",
 +                              strerror(-rc1));
 +              }
 +      }
 +
 +      if (qctl->qc_cmd == LUSTRE_Q_GETQUOTA && !quiet)
 +              print_quota_title(name, qctl, human_readable);
 +
 +      if (rc1 && *obd_type)
 +              fprintf(stderr, "%s %s ", obd_type, obd_uuid);
 +
 +      if (qctl->qc_valid != QC_GENERAL)
 +              mnt = "";
 +
 +      inacc = (qctl->qc_cmd == LUSTRE_Q_GETQUOTA) &&
 +              ((qctl->qc_dqblk.dqb_valid & (QIF_LIMITS|QIF_USAGE)) !=
 +               (QIF_LIMITS|QIF_USAGE));
 +
 +      print_quota(mnt, qctl, QC_GENERAL, rc1, human_readable);
 +
 +      if (qctl->qc_valid == QC_GENERAL && qctl->qc_cmd != LUSTRE_Q_GETINFO &&
 +          verbose) {
 +              char strbuf[STRBUF_LEN];
 +
 +              rc2 = print_obd_quota(mnt, qctl, 1, human_readable,
 +                                    &total_ialloc);
 +              rc3 = print_obd_quota(mnt, qctl, 0, human_readable,
 +                                    &total_balloc);
 +              kbytes2str(total_balloc, strbuf, sizeof(strbuf),
 +                         human_readable);
 +              printf("Total allocated inode limit: %ju, total "
 +                     "allocated block limit: %s\n", (uintmax_t)total_ialloc,
 +                     strbuf);
 +      }
 +
 +      if (rc1 || rc2 || rc3 || inacc)
 +              printf("Some errors happened when getting quota info. "
 +                     "Some devices may be not working or deactivated. "
 +                     "The data in \"[]\" is inaccurate.\n");
 +out:
 +      return rc1;
 +
 +}
 +
  static int lfs_quota(int argc, char **argv)
  {
        int c;
        char *mnt, *name = NULL;
        struct if_quotactl qctl = { .qc_cmd = LUSTRE_Q_GETQUOTA,
                                    .qc_type = ALLQUOTA };
 -      char *obd_type = (char *)qctl.obd_type;
        char *obd_uuid = (char *)qctl.obd_uuid.uuid;
 -      int rc = 0, rc1 = 0, rc2 = 0, rc3 = 0,
 -          verbose = 0, pass = 0, quiet = 0, inacc;
 +      int rc = 0, rc1 = 0, verbose = 0, quiet = 0;
        char *endptr;
        __u32 valid = QC_GENERAL, idx = 0;
 -      __u64 total_ialloc = 0, total_balloc = 0;
        bool human_readable = false;
        int qtype;
  
@@@ -4293,28 -4898,29 +4977,28 @@@ quota_type
          /* current uid/gid info for "lfs quota /path/to/lustre/mount" */
        if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA && qctl.qc_type == ALLQUOTA &&
            optind == argc - 1) {
 -all_output:
 -              memset(&qctl, 0, sizeof(qctl)); /* spoiled by print_*_quota */
 +
                qctl.qc_cmd = LUSTRE_Q_GETQUOTA;
                qctl.qc_valid = valid;
                qctl.qc_idx = idx;
 -              qctl.qc_type = pass;
 -              switch (qctl.qc_type) {
 -              case USRQUOTA:
 -                      qctl.qc_id = geteuid();
 -                      rc = uid2name(&name, qctl.qc_id);
 -                      break;
 -              case GRPQUOTA:
 -                      qctl.qc_id = getegid();
 -                      rc = gid2name(&name, qctl.qc_id);
 -                      break;
 -              default:
 -                      rc = -ENOTSUP;
 -                      pass++;
 -                      goto out;
 +
 +              for (qtype = USRQUOTA; qtype <= GRPQUOTA; qtype++) {
 +                      qctl.qc_type = qtype;
 +                      if (qtype == USRQUOTA) {
 +                              qctl.qc_id = geteuid();
 +                              rc = uid2name(&name, qctl.qc_id);
 +                      } else {
 +                              qctl.qc_id = getegid();
 +                              rc = gid2name(&name, qctl.qc_id);
 +                      }
 +                      if (rc)
 +                              name = "<unknown>";
 +                      mnt = argv[optind];
 +                      rc1 = get_print_quota(mnt, name, &qctl, verbose, quiet,
 +                                            human_readable);
 +                      if (rc1 && !rc)
 +                              rc = rc1;
                }
 -              if (rc)
 -                      name = "<unknown>";
 -              pass++;
        /* lfs quota -u username /path/to/lustre/mount */
        } else if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) {
                /* options should be followed by u/g-name and mntpoint */
                                return CMD_HELP;
                        }
                }
 +              mnt = argv[optind];
 +              rc = get_print_quota(mnt, name, &qctl, verbose, quiet,
 +                                   human_readable);
        } else if (optind + 1 != argc || qctl.qc_type == ALLQUOTA) {
                fprintf(stderr, "error: missing quota info argument(s)\n");
                return CMD_HELP;
        }
  
 -      mnt = argv[optind];
 -      rc1 = llapi_quotactl(mnt, &qctl);
 -      if (rc1 < 0) {
 -              switch (rc1) {
 -              case -ESRCH:
 -                      fprintf(stderr, "%s quotas are not enabled.\n",
 -                              qtype_name(qctl.qc_type));
 -                      goto out;
 -              case -EPERM:
 -                      fprintf(stderr, "Permission denied.\n");
 -              case -ENODEV:
 -              case -ENOENT:
 -                      /* We already got error message. */
 -                      goto out;
 -              default:
 -                      fprintf(stderr, "Unexpected quotactl error: %s\n",
 -                              strerror(-rc1));
 -              }
 -      }
 -
 -      if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA && !quiet)
 -              print_quota_title(name, &qctl, human_readable);
 -
 -        if (rc1 && *obd_type)
 -                fprintf(stderr, "%s %s ", obd_type, obd_uuid);
 -
 -        if (qctl.qc_valid != QC_GENERAL)
 -                mnt = "";
 -
 -      inacc = (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) &&
 -              ((qctl.qc_dqblk.dqb_valid & (QIF_LIMITS|QIF_USAGE)) !=
 -               (QIF_LIMITS|QIF_USAGE));
 -
 -      print_quota(mnt, &qctl, QC_GENERAL, rc1, human_readable);
 -
 -      if (qctl.qc_valid == QC_GENERAL && qctl.qc_cmd != LUSTRE_Q_GETINFO &&
 -          verbose) {
 -              char strbuf[STRBUF_LEN];
 -
 -              rc2 = print_obd_quota(mnt, &qctl, 1, human_readable,
 -                                    &total_ialloc);
 -              rc3 = print_obd_quota(mnt, &qctl, 0, human_readable,
 -                                    &total_balloc);
 -              kbytes2str(total_balloc, strbuf, sizeof(strbuf),
 -                         human_readable);
 -              printf("Total allocated inode limit: %ju, total "
 -                     "allocated block limit: %s\n", (uintmax_t)total_ialloc,
 -                     strbuf);
 -      }
 -
 -      if (rc1 || rc2 || rc3 || inacc)
 -              printf("Some errors happened when getting quota info. "
 -                     "Some devices may be not working or deactivated. "
 -                     "The data in \"[]\" is inaccurate.\n");
 -
 -out:
 -      if (pass > 0 && pass < LL_MAXQUOTAS)
 -              goto all_output;
 -
 -      return rc1;
 +      return rc;
  }
  #endif /* HAVE_SYS_QUOTA_H! */
  
        return rc;
  }
  
+ /** The input string contains a comma delimited list of component ids and
+  * ranges, for example "1,2-4,7".
+  */
+ static int parse_mirror_ids(__u16 *ids, int size, char *arg)
+ {
+       bool end_of_loop = false;
+       char *ptr = NULL;
+       int nr = 0;
+       int rc;
+       if (arg == NULL)
+               return -EINVAL;
+       while (!end_of_loop) {
+               int start_index;
+               int end_index;
+               int i;
+               char *endptr = NULL;
+               rc = -EINVAL;
+               ptr = strchrnul(arg, ',');
+               end_of_loop = *ptr == '\0';
+               *ptr = '\0';
+               start_index = strtol(arg, &endptr, 0);
+               if (endptr == arg) /* no data at all */
+                       break;
+               if (*endptr != '-' && *endptr != '\0') /* has invalid data */
+                       break;
+               if (start_index < 0)
+                       break;
+               end_index = start_index;
+               if (*endptr == '-') {
+                       end_index = strtol(endptr + 1, &endptr, 0);
+                       if (*endptr != '\0')
+                               break;
+                       if (end_index < start_index)
+                               break;
+               }
+               for (i = start_index; i <= end_index && size > 0; i++) {
+                       int j;
+                       /* remove duplicate */
+                       for (j = 0; j < nr; j++) {
+                               if (ids[j] == i)
+                                       break;
+                       }
+                       if (j == nr) { /* no duplicate */
+                               ids[nr++] = i;
+                               --size;
+                       }
+               }
+               if (size == 0 && i < end_index)
+                       break;
+               *ptr = ',';
+               arg = ++ptr;
+               rc = 0;
+       }
+       if (!end_of_loop && ptr != NULL)
+               *ptr = ',';
+       return rc < 0 ? rc : nr;
+ }
+ static inline
+ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
+                          __u16 *mirror_ids, int ids_nr)
+ {
+       const char *progname = "lfs mirror resync";
+       struct llapi_resync_comp comp_array[1024] = { { 0 } };
+       struct llapi_layout *layout;
+       struct stat stbuf;
+       uint32_t flr_state;
+       int comp_size = 0;
+       int idx;
+       int fd;
+       int rc;
+       if (stat(fname, &stbuf) < 0) {
+               fprintf(stderr, "%s: cannot stat file '%s': %s.\n",
+                       progname, fname, strerror(errno));
+               rc = -errno;
+               goto error;
+       }
+       if (!S_ISREG(stbuf.st_mode)) {
+               fprintf(stderr, "%s: '%s' is not a regular file.\n",
+                       progname, fname);
+               rc = -EINVAL;
+               goto error;
+       }
+       fd = open(fname, O_DIRECT | O_RDWR);
+       if (fd < 0) {
+               fprintf(stderr, "%s: cannot open '%s': %s.\n",
+                       progname, fname, strerror(errno));
+               rc = -errno;
+               goto error;
+       }
+       ioc->lil_mode = LL_LEASE_WRLCK;
+       ioc->lil_flags = LL_LEASE_RESYNC;
+       rc = llapi_lease_get_ext(fd, ioc);
+       if (rc < 0) {
+               fprintf(stderr, "%s: '%s' llapi_lease_get_ext resync failed: "
+                       "%s.\n", progname, fname, strerror(errno));
+               goto close_fd;
+       }
+       layout = llapi_layout_get_by_fd(fd, 0);
+       if (layout == NULL) {
+               fprintf(stderr, "%s: '%s' llapi_layout_get_by_fd failed: %s.\n",
+                       progname, fname, strerror(errno));
+               rc = -errno;
+               goto close_fd;
+       }
+       rc = llapi_layout_flags_get(layout, &flr_state);
+       if (rc) {
+               fprintf(stderr, "%s: '%s' llapi_layout_flags_get failed: %s.\n",
+                       progname, fname, strerror(errno));
+               rc = -errno;
+               goto close_fd;
+       }
+       flr_state &= LCM_FL_FLR_MASK;
+       switch (flr_state) {
+       case LCM_FL_NOT_FLR:
+               rc = -EINVAL;
+       case LCM_FL_RDONLY:
+               fprintf(stderr, "%s: '%s' file state error: %s.\n",
+                       progname, fname, lcm_flags_string(flr_state));
+               goto close_fd;
+       default:
+               break;
+       }
+       /* get stale component info */
+       comp_size = llapi_mirror_find_stale(layout, comp_array,
+                                           ARRAY_SIZE(comp_array),
+                                           mirror_ids, ids_nr);
+       if (comp_size < 0) {
+               rc = comp_size;
+               goto close_fd;
+       }
+       idx = 0;
+       while (idx < comp_size) {
+               ssize_t result;
+               uint64_t end;
+               __u16 mirror_id;
+               int i;
+               rc = llapi_lease_check(fd);
+               if (rc != LL_LEASE_WRLCK) {
+                       fprintf(stderr, "%s: '%s' lost lease lock.\n",
+                               progname, fname);
+                       goto close_fd;
+               }
+               mirror_id = comp_array[idx].lrc_mirror_id;
+               end = comp_array[idx].lrc_end;
+               /* try to combine adjacent component */
+               for (i = idx + 1; i < comp_size; i++) {
+                       if (mirror_id != comp_array[i].lrc_mirror_id ||
+                           end != comp_array[i].lrc_start)
+                               break;
+                       end = comp_array[i].lrc_end;
+               }
+               result = llapi_mirror_resync_one(fd, layout, mirror_id,
+                                                comp_array[idx].lrc_start,
+                                                end);
+               if (result < 0) {
+                       fprintf(stderr, "%s: '%s' llapi_mirror_resync_one: "
+                               "%ld.\n", progname, fname, result);
+                       rc = result;
+                       goto close_fd;
+               } else if (result > 0) {
+                       int j;
+                       /* mark synced components */
+                       for (j = idx; j < i; j++)
+                               comp_array[j].lrc_synced = true;
+               }
+               idx = i;
+       }
+       /* prepare ioc for lease put */
+       ioc->lil_mode = LL_LEASE_UNLCK;
+       ioc->lil_flags = LL_LEASE_RESYNC_DONE;
+       ioc->lil_count = 0;
+       for (idx = 0; idx < comp_size; idx++) {
+               if (comp_array[idx].lrc_synced) {
+                       ioc->lil_ids[ioc->lil_count] = comp_array[idx].lrc_id;
+                       ioc->lil_count++;
+               }
+       }
+       llapi_layout_free(layout);
+       rc = llapi_lease_get_ext(fd, ioc);
+       if (rc <= 0) {
+               if (rc == 0) /* lost lease lock */
+                       rc = -EBUSY;
+               fprintf(stderr, "%s: resync file '%s' failed: %s.\n",
+                       progname, fname, strerror(errno));
+               goto close_fd;
+       }
+       /**
+        * llapi_lease_get_ext returns lease mode when it request to unlock
+        * the lease lock
+        */
+       rc = 0;
+ close_fd:
+       close(fd);
+ error:
+       return rc;
+ }
+ static inline int lfs_mirror_resync(int argc, char **argv)
+ {
+       struct ll_ioc_lease *ioc = NULL;
+       __u16 mirror_ids[128] = { 0 };
+       int ids_nr = 0;
+       int c;
+       int rc = 0;
+       struct option long_opts[] = {
+       { .val = 'o',   .name = "only",         .has_arg = required_argument },
+       { .name = NULL } };
+       while ((c = getopt_long(argc, argv, "o:", long_opts, NULL)) >= 0) {
+               switch (c) {
+               case 'o':
+                       rc = parse_mirror_ids(mirror_ids,
+                                       sizeof(mirror_ids) / sizeof(__u16),
+                                       optarg);
+                       if (rc < 0) {
+                               fprintf(stderr,
+                                       "%s: bad mirror ids '%s'.\n",
+                                       argv[0], optarg);
+                               goto error;
+                       }
+                       ids_nr = rc;
+                       break;
+               default:
+                       fprintf(stderr, "%s: options '%s' unrecognized.\n",
+                               argv[0], argv[optind - 1]);
+                       rc = -EINVAL;
+                       goto error;
+               }
+       }
+       if (argc == optind) {
+               fprintf(stderr, "%s: no file name given.\n", argv[0]);
+               rc = CMD_HELP;
+               goto error;
+       }
+       if (ids_nr > 0 && argc > optind + 1) {
+               fprintf(stderr, "%s: option '--only' cannot be used upon "
+                       "multiple files.\n", argv[0]);
+               rc = CMD_HELP;
+               goto error;
+       }
+       /* set the lease on the file */
+       ioc = calloc(sizeof(*ioc) + sizeof(__u32) * 4096, 1);
+       if (ioc == NULL) {
+               fprintf(stderr, "%s: cannot alloc id array for ioc: %s.\n",
+                       argv[0], strerror(errno));
+               rc = -errno;
+               goto error;
+       }
+       for (; optind < argc; optind++) {
+               rc = lfs_mirror_resync_file(argv[optind], ioc,
+                                           mirror_ids, ids_nr);
+               if (rc)
+                       fprintf(stderr, "%s: resync file '%s' failed: %d\n",
+                               argv[0], argv[optind], rc);
+               /* ignore previous file's error, continue with next file */
+               /* reset ioc */
+               memset(ioc, 0, sizeof(__u32) * 4096);
+       }
+       free(ioc);
+ error:
+       return rc;
+ }
+ /**
+  * lfs_mirror() - Parse and execute lfs mirror commands.
+  * @argc: The count of lfs mirror command line arguments.
+  * @argv: Array of strings for lfs mirror command line arguments.
+  *
+  * This function parses lfs mirror commands and performs the
+  * corresponding functions specified in mirror_cmdlist[].
+  *
+  * Return: 0 on success or an error code on failure.
+  */
+ static int lfs_mirror(int argc, char **argv)
+ {
+       char cmd[PATH_MAX];
+       int rc = 0;
+       setlinebuf(stdout);
+       Parser_init("lfs-mirror > ", mirror_cmdlist);
+       snprintf(cmd, sizeof(cmd), "%s %s", progname, argv[0]);
+       progname = cmd;
+       program_invocation_short_name = cmd;
+       if (argc > 1)
+               rc = Parser_execarg(argc - 1, argv + 1, mirror_cmdlist);
+       else
+               rc = Parser_commands();
+       return rc < 0 ? -rc : rc;
+ }
+ /**
+  * lfs_mirror_list_commands() - List lfs mirror commands.
+  * @argc: The count of command line arguments.
+  * @argv: Array of strings for command line arguments.
+  *
+  * This function lists lfs mirror commands defined in mirror_cmdlist[].
+  *
+  * Return: 0 on success.
+  */
+ static int lfs_mirror_list_commands(int argc, char **argv)
+ {
+       char buffer[81] = "";
+       Parser_list_commands(mirror_cmdlist, buffer, sizeof(buffer),
+                            NULL, 0, 4);
+       return 0;
+ }
  static int lfs_list_commands(int argc, char **argv)
  {
        char buffer[81] = ""; /* 80 printable chars + terminating NUL */
@@@ -2190,6 -2190,18 +2190,18 @@@ int sattr_cache_get_defaults(const cha
          return 0;
  }
  
+ static char *layout2name(__u32 layout_pattern)
+ {
+       if (layout_pattern == LOV_PATTERN_MDT)
+               return "mdt";
+       else if (layout_pattern == LOV_PATTERN_RAID0)
+               return "raid0";
+       else if (layout_pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_F_RELEASED))
+               return "released";
+       else
+               return "unknown";
+ }
  enum lov_dump_flags {
        LDF_IS_DIR      = 0x0001,
        LDF_IS_RAW      = 0x0002,
@@@ -2335,7 -2347,11 +2347,11 @@@ static void lov_dump_user_lmm_header(st
                if (verbose & ~VERBOSE_LAYOUT)
                        llapi_printf(LLAPI_MSG_NORMAL, "%s%spattern:       ",
                                     space, prefix);
-               llapi_printf(LLAPI_MSG_NORMAL, "%.x", lum->lmm_pattern);
+               if (lov_pattern_supported(lum->lmm_pattern))
+                       llapi_printf(LLAPI_MSG_NORMAL, "%s",
+                                    layout2name(lum->lmm_pattern));
+               else
+                       llapi_printf(LLAPI_MSG_NORMAL, "%.x", lum->lmm_pattern);
                separator = is_dir ? " " : "\n";
        }
  
@@@ -2459,8 -2475,8 +2475,8 @@@ void lov_dump_user_lmm_v1v3(struct lov_
                                             obdindex == idx ? " *" : "");
                        }
                }
-               llapi_printf(LLAPI_MSG_NORMAL, "\n");
        }
+       llapi_printf(LLAPI_MSG_NORMAL, "\n");
  }
  
  void lmv_dump_user_lmm(struct lmv_user_md *lum, char *pool_name,
@@@ -2591,24 -2607,40 +2607,40 @@@ static void lov_dump_comp_v1_header(str
  
        if (verbose & VERBOSE_DETAIL) {
                llapi_printf(LLAPI_MSG_NORMAL, "composite_header:\n");
-               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_magic:       0x%08X\n",
+               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_magic:         0x%08X\n",
                             " ", comp_v1->lcm_magic);
-               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_size:        %u\n",
+               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_size:          %u\n",
                             " ", comp_v1->lcm_size);
-               llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_flags:       %u\n",
-                            " ", comp_v1->lcm_flags);
+               if (flags & LDF_IS_DIR)
+                       llapi_printf(LLAPI_MSG_NORMAL,
+                                    "%2slcm_flags:         %s\n", " ",
+                                    comp_v1->lcm_mirror_count > 0 ?
+                                                       "mirrored" : "");
+               else
+                       llapi_printf(LLAPI_MSG_NORMAL,
+                                    "%2slcm_flags:         %s\n",
+                                    " ", lcm_flags_string(comp_v1->lcm_flags));
        }
  
        if (verbose & VERBOSE_GENERATION) {
                if (verbose & ~VERBOSE_GENERATION)
-                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_layout_gen:  ",
+                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_layout_gen:    ",
                                     " ");
                llapi_printf(LLAPI_MSG_NORMAL, "%u\n", comp_v1->lcm_layout_gen);
        }
  
+       if (verbose & VERBOSE_MIRROR_COUNT) {
+               if (verbose & ~VERBOSE_MIRROR_COUNT)
+                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_mirror_count:  ",
+                                    " ");
+               llapi_printf(LLAPI_MSG_NORMAL, "%u\n",
+                            comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ?
+                            comp_v1->lcm_mirror_count + 1 : 1);
+       }
        if (verbose & VERBOSE_COMP_COUNT) {
                if (verbose & ~VERBOSE_COMP_COUNT)
-                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_entry_count: ",
+                       llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_entry_count:   ",
                                     " ");
                llapi_printf(LLAPI_MSG_NORMAL, "%u\n",
                             comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ?
                llapi_printf(LLAPI_MSG_NORMAL, "components:\n");
  }
  
- static void comp_flags2str(__u32 comp_flags)
+ static void lcme_flags2str(__u32 comp_flags)
  {
        bool found = false;
        int i = 0;
@@@ -2678,7 -2710,7 +2710,7 @@@ static void lov_dump_comp_v1_entry(stru
                if (verbose & ~VERBOSE_COMP_FLAGS)
                        llapi_printf(LLAPI_MSG_NORMAL,
                                     "%4slcme_flags:          ", " ");
-               comp_flags2str(entry->lcme_flags);
+               lcme_flags2str(entry->lcme_flags);
                separator = "\n";
        }
  
@@@ -2853,7 -2885,7 +2885,7 @@@ static int find_comp_end_cmp(unsigned l
   *     lmm_fid:           [0x200000401:0x1:0x0]
   *     lmm_stripe_count:  1
   *     lmm_stripe_size:   1048576
-  *     lmm_pattern:       1
+  *     lmm_pattern:       raid0
   *     lmm_layout_gen:    0
   *     lmm_stripe_offset: 0
   *     lmm_objects:
   *     lmm_fid:           [0x200000401:0x1:0x0]
   *     lmm_stripe_count:  2
   *     lmm_stripe_size:   1048576
-  *     lmm_pattern:       1
+  *     lmm_pattern:       raid0
   *     lmm_layout_gen:    0
   *     lmm_stripe_offset: 1
   *     lmm_objects:
@@@ -4491,7 -4523,7 +4523,7 @@@ static int get_mdtname(char *name, cha
                  } else {
                          /* Not enough room to add suffix */
                          llapi_err_noerrno(LLAPI_MSG_ERROR,
 -                                          "MDT name too long |%s|", name);
 +                                        "Invalid MDT name |%s|", name);
                          return -EINVAL;
                  }
          }
@@@ -4551,8 -4583,6 +4583,8 @@@ int llapi_fid2path(const char *device, 
        const char *fidstr_orig = fidstr;
        struct lu_fid fid;
        struct getinfo_fid2path *gf;
 +      char *a;
 +      char *b;
        int rc;
  
        while (*fidstr == '[')
        if (rc)
                goto out_free;
  
 -      memcpy(buf, gf->gf_u.gf_path, gf->gf_pathlen);
 +      b = buf;
 +      /* strip out instances of // */
 +      for (a = gf->gf_u.gf_path; *a != '\0'; a++) {
 +              if ((*a == '/') && (*(a + 1) == '/'))
 +                      continue;
 +              *b = *a;
 +              b++;
 +      }
 +      *b = '\0';
 +
        if (buf[0] == '\0') { /* ROOT path */
                buf[0] = '/';
                buf[1] = '\0';
        }
 +
        *recno = gf->gf_recno;
        *linkno = gf->gf_linkno;
  
@@@ -4753,18 -4773,39 +4785,39 @@@ int llapi_get_connect_flags(const char 
   */
  int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags)
  {
-         int rc;
-         struct ioc_data_version idv;
+       int rc;
+       struct ioc_data_version idv;
  
-         idv.idv_flags = flags;
+       idv.idv_flags = (__u32)flags;
  
-         rc = ioctl(fd, LL_IOC_DATA_VERSION, &idv);
-         if (rc)
-                 rc = -errno;
-         else
-                 *data_version = idv.idv_version;
+       rc = ioctl(fd, LL_IOC_DATA_VERSION, &idv);
+       if (rc)
+               rc = -errno;
+       else
+               *data_version = idv.idv_version;
  
-         return rc;
+       return rc;
+ }
+ /*
+  * Fetch layout version from OST objects. Layout version on OST objects are
+  * only set when the file is a mirrored file AND after the file has been
+  * written at least once.
+  *
+  * It actually fetches the least layout version from the objects.
+  */
+ int llapi_get_ost_layout_version(int fd, __u32 *layout_version)
+ {
+       int rc;
+       struct ioc_data_version idv = { 0 };
+       rc = ioctl(fd, LL_IOC_DATA_VERSION, &idv);
+       if (rc)
+               rc = -errno;
+       else
+               *layout_version = idv.idv_layout_version;
+       return rc;
  }
  
  /*
diff --combined lustre/utils/wiretest.c
  #include <string.h>
  
  #include <linux/lustre/lustre_idl.h>
 +#ifdef HAVE_SERVER_SUPPORT
  #include <linux/lustre/lustre_lfsck_user.h>
  #include <linux/lustre/lustre_disk.h>
 +#endif
  
  #define LASSERT(cond) if (!(cond)) { printf("failed " #cond "\n"); ret = 1; }
  #define LASSERTF(cond, fmt, ...) if (!(cond)) { printf("failed '" #cond "'" fmt, ## __VA_ARGS__);ret = 1;}
@@@ -215,7 -213,7 +215,7 @@@ void lustre_assert_wire_constants(void
                 (long long)REINT_RMENTRY);
        LASSERTF(REINT_MIGRATE == 9, "found %lld\n",
                 (long long)REINT_MIGRATE);
-       LASSERTF(REINT_MAX == 10, "found %lld\n",
+       LASSERTF(REINT_MAX == 11, "found %lld\n",
                 (long long)REINT_MAX);
        LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
                (unsigned)DISP_IT_EXECD);
                 (long long)(int)offsetof(struct obdo, o_layout));
        LASSERTF((int)sizeof(((struct obdo *)0)->o_layout) == 28, "found %lld\n",
                 (long long)(int)sizeof(((struct obdo *)0)->o_layout));
-       LASSERTF((int)offsetof(struct obdo, o_padding_3) == 164, "found %lld\n",
-                (long long)(int)offsetof(struct obdo, o_padding_3));
-       LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_3) == 4, "found %lld\n",
-                (long long)(int)sizeof(((struct obdo *)0)->o_padding_3));
+       LASSERTF((int)offsetof(struct obdo, o_layout_version) == 164, "found %lld\n",
+                (long long)(int)offsetof(struct obdo, o_layout_version));
+       LASSERTF((int)sizeof(((struct obdo *)0)->o_layout_version) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct obdo *)0)->o_layout_version));
        LASSERTF((int)offsetof(struct obdo, o_uid_h) == 168, "found %lld\n",
                 (long long)(int)offsetof(struct obdo, o_uid_h));
        LASSERTF((int)sizeof(((struct obdo *)0)->o_uid_h) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding));
        LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n",
                (unsigned)LCME_FL_INIT);
+       LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n",
+               (unsigned)LCME_FL_NEG);
  
        /* Checks for struct lov_comp_md_v1 */
        LASSERTF((int)sizeof(struct lov_comp_md_v1) == 32, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_entry_count));
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count) == 2, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count));
-       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 16, "found %lld\n",
+       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_mirror_count) == 16, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_v1, lcm_mirror_count));
+       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count));
+       LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 18, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding1));
-       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 8, "found %lld\n",
+       LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 6, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1));
        LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding2) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding2));
        LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]) == 48, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]));
        CLASSERT(LOV_MAGIC_COMP_V1 == (0x0BD60000 | 0x0BD0));
+       LASSERTF(LCM_FL_NOT_FLR == 0, "found %lld\n",
+                (long long)LCM_FL_NOT_FLR);
+       LASSERTF(LCM_FL_RDONLY == 1, "found %lld\n",
+                (long long)LCM_FL_RDONLY);
+       LASSERTF(LCM_FL_WRITE_PENDING == 2, "found %lld\n",
+                (long long)LCM_FL_WRITE_PENDING);
+       LASSERTF(LCM_FL_SYNC_PENDING == 3, "found %lld\n",
+                (long long)LCM_FL_SYNC_PENDING);
  
        /* Checks for struct lmv_mds_md_v1 */
        LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
        LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11));
  
+       /* Checks for struct mdt_rec_resync */
+       LASSERTF((int)sizeof(struct mdt_rec_resync) == 136, "found %lld\n",
+                (long long)(int)sizeof(struct mdt_rec_resync));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_opcode) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_opcode));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_opcode) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_opcode));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_cap) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_cap));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_cap) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_cap));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fsuid) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_fsuid));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fsuid) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fsuid));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fsuid_h) == 12, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_fsuid_h));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fsuid_h) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fsuid_h));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fsgid) == 16, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_fsgid));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fsgid) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fsgid));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fsgid_h) == 20, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_fsgid_h));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fsgid_h) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fsgid_h));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_suppgid1) == 24, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_suppgid1));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid1));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_suppgid1_h) == 28, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_suppgid1_h));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid1_h) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid1_h));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_suppgid2) == 32, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_suppgid2));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid2) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid2));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_suppgid2_h) == 36, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_suppgid2_h));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid2_h) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid2_h));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fid) == 40, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_fid));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fid) == 16, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fid));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding0) == 56, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding0));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding0) == 16, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding0));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding1) == 80, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding1));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding1) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding1));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding2) == 88, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding2));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding2) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding2));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding3) == 96, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding3));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding3) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding3));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding4) == 104, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding4));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding4) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding4));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_bias) == 112, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_bias));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_bias) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_bias));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding5) == 116, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding5));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding5) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding5));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding6) == 120, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding6));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding6) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding6));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding7) == 124, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding7));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding7) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding7));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding8) == 128, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding8));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding8) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding8));
+       LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding9) == 132, "found %lld\n",
+                (long long)(int)offsetof(struct mdt_rec_resync, rs_padding9));
+       LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding9) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding9));
        /* Checks for struct mdt_rec_reint */
        LASSERTF((int)sizeof(struct mdt_rec_reint) == 136, "found %lld\n",
                 (long long)(int)sizeof(struct mdt_rec_reint));
                 (long long)(int)offsetof(struct layout_intent, li_flags));
        LASSERTF((int)sizeof(((struct layout_intent *)0)->li_flags) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct layout_intent *)0)->li_flags));
-       LASSERTF((int)offsetof(struct layout_intent, li_start) == 8, "found %lld\n",
-                (long long)(int)offsetof(struct layout_intent, li_start));
-       LASSERTF((int)sizeof(((struct layout_intent *)0)->li_start) == 8, "found %lld\n",
-                (long long)(int)sizeof(((struct layout_intent *)0)->li_start));
-       LASSERTF((int)offsetof(struct layout_intent, li_end) == 16, "found %lld\n",
-                (long long)(int)offsetof(struct layout_intent, li_end));
-       LASSERTF((int)sizeof(((struct layout_intent *)0)->li_end) == 8, "found %lld\n",
-                (long long)(int)sizeof(((struct layout_intent *)0)->li_end));
+       LASSERTF((int)offsetof(struct layout_intent, li_extent) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct layout_intent, li_extent));
+       LASSERTF((int)sizeof(((struct layout_intent *)0)->li_extent) == 16, "found %lld\n",
+                (long long)(int)sizeof(((struct layout_intent *)0)->li_extent));
        LASSERTF(LAYOUT_INTENT_ACCESS == 0, "found %lld\n",
                 (long long)LAYOUT_INTENT_ACCESS);
        LASSERTF(LAYOUT_INTENT_READ == 1, "found %lld\n",
                 (long long)(int)offsetof(struct lfsck_request, lr_padding_3));
        LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_3) == 8, "found %lld\n",
                 (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_3));
 +#ifdef HAVE_SERVER_SUPPORT
        LASSERTF(LFSCK_TYPE_SCRUB == 0x00000000UL, "found 0x%.8xUL\n",
                (unsigned)LFSCK_TYPE_SCRUB);
        LASSERTF(LFSCK_TYPE_LAYOUT == 0x00000001UL, "found 0x%.8xUL\n",
                (unsigned)LFSCK_TYPE_LAYOUT);
        LASSERTF(LFSCK_TYPE_NAMESPACE == 0x00000004UL, "found 0x%.8xUL\n",
                (unsigned)LFSCK_TYPE_NAMESPACE);
 +#endif
        LASSERTF(LE_LASTID_REBUILDING == 1, "found %lld\n",
                 (long long)LE_LASTID_REBUILDING);
        LASSERTF(LE_LASTID_REBUILT == 2, "found %lld\n",