Whamcloud - gitweb
LU-10070 lod: SEL: Implement basic spillover space
[fs/lustre-release.git] / lustre / utils / liblustreapi_layout.c
index 531e083..ffee3fc 100644 (file)
@@ -34,6 +34,7 @@
 #include <unistd.h>
 #include <errno.h>
 #include <limits.h>
+#include <assert.h>
 #include <sys/xattr.h>
 #include <sys/param.h>
 
@@ -59,6 +60,7 @@ struct llapi_layout_comp {
        struct lu_extent        llc_extent;     /* [start, end) of component */
        uint32_t                llc_id;         /* unique ID of component */
        uint32_t                llc_flags;      /* LCME_FL_* flags */
+       uint64_t                llc_timestamp;  /* snapshot timestamp */
        struct list_head        llc_list;       /* linked to the llapi_layout
                                                   components list */
 };
@@ -147,6 +149,7 @@ llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
                        ent = &comp_v1->lcm_entries[i];
                        __swab32s(&ent->lcme_id);
                        __swab32s(&ent->lcme_flags);
+                       __swab64s(&ent->lcme_timestamp);
                        __swab64s(&ent->lcme_extent.e_start);
                        __swab64s(&ent->lcme_extent.e_end);
                        __swab32s(&ent->lcme_offset);
@@ -354,28 +357,134 @@ struct llapi_layout *llapi_layout_alloc(void)
 }
 
 /**
+ * Check if the given \a lum_size is large enough to hold the required
+ * fields in \a lum.
+ *
+ * \param[in] lum      the struct lov_user_md to check
+ * \param[in] lum_size the number of bytes in \a lum
+ *
+ * \retval true                the \a lum_size is too small
+ * \retval false       the \a lum_size is large enough
+ */
+static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
+{
+       uint32_t magic;
+
+       if (lum_size < sizeof(lum->lmm_magic))
+               return true;
+
+       if (lum->lmm_magic == LOV_MAGIC_V1 ||
+           lum->lmm_magic == __swab32(LOV_MAGIC_V1))
+               magic = LOV_MAGIC_V1;
+       else if (lum->lmm_magic == LOV_MAGIC_V3 ||
+                lum->lmm_magic == __swab32(LOV_MAGIC_V3))
+               magic = LOV_MAGIC_V3;
+       else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
+                lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
+               magic = LOV_MAGIC_COMP_V1;
+       else
+               return true;
+
+       if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
+               return lum_size < lov_user_md_size(0, magic);
+       else
+               return lum_size < sizeof(struct lov_comp_md_v1);
+}
+
+/* Verify if the objects count in lum is consistent with the
+ * stripe count in lum. It applies to regular file only. */
+static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
+{
+       struct lov_comp_md_v1 *comp_v1 = NULL;
+       int i, ent_count, obj_count;
+
+       if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
+               comp_v1 = (struct lov_comp_md_v1 *)lum;
+               ent_count = comp_v1->lcm_entry_count;
+       } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
+                  lum->lmm_magic == LOV_MAGIC_V3) {
+               ent_count = 1;
+       } else {
+               return false;
+       }
+
+       for (i = 0; i < ent_count; i++) {
+               if (comp_v1) {
+                       lum = (struct lov_user_md *)((char *)comp_v1 +
+                               comp_v1->lcm_entries[i].lcme_offset);
+                       lum_size = comp_v1->lcm_entries[i].lcme_size;
+               }
+               obj_count = llapi_layout_objects_in_lum(lum, lum_size);
+
+               if (comp_v1) {
+                       if (!(comp_v1->lcm_entries[i].lcme_flags &
+                                LCME_FL_INIT) && obj_count != 0)
+                               return false;
+               } else if (obj_count != lum->lmm_stripe_count) {
+                       return false;
+               }
+       }
+       return true;
+}
+
+/**
  * Convert the data from a lov_user_md to a newly allocated llapi_layout.
  * The caller is responsible for freeing the returned pointer.
  *
- * \param[in] lum      LOV user metadata structure to copy data from
- * \param[in] lum_size size the the lum passed in
+ * \param[in] lov_xattr                LOV user metadata xattr to copy data from
+ * \param[in] lov_xattr_size   size the lov_xattr_size passed in
+ * \param[in] flags            bitwise-or'd flags to control the behavior
  *
  * \retval             valid llapi_layout pointer on success
  * \retval             NULL if memory allocation fails
  */
-static struct llapi_layout *
-llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
+struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
+                                              ssize_t lov_xattr_size,
+                                              uint32_t flags)
 {
+       struct lov_user_md *lum = lov_xattr;
        struct lov_comp_md_v1 *comp_v1 = NULL;
        struct lov_comp_md_entry_v1 *ent;
        struct lov_user_md *v1;
-       struct llapi_layout *layout;
+       struct llapi_layout *layout = NULL;
        struct llapi_layout_comp *comp;
        int i, ent_count = 0, obj_count;
 
-       layout = __llapi_layout_alloc();
-       if (layout == NULL)
+       if (lov_xattr == NULL || lov_xattr_size <= 0) {
+               errno = EINVAL;
+               return NULL;
+       }
+
+       /* Return an error if we got back a partial layout. */
+       if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) {
+               errno = ERANGE;
                return NULL;
+       }
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+       if (flags & LLAPI_LXF_COPY) {
+               lum = malloc(lov_xattr_size);
+               if (lum == NULL) {
+                       errno = ENOMEM;
+                       return NULL;
+               }
+               memcpy(lum, lov_xattr, lov_xattr_size);
+       }
+#endif
+
+       llapi_layout_swab_lov_user_md(lum, lov_xattr_size);
+
+       if ((flags & LLAPI_LXF_CHECK) &&
+           !llapi_layout_lum_valid(lum, lov_xattr_size)) {
+               errno = EBADSLT;
+               goto out;
+       }
+
+       layout = __llapi_layout_alloc();
+       if (layout == NULL) {
+               errno = ENOMEM;
+               goto out;
+       }
 
        if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
                comp_v1 = (struct lov_comp_md_v1 *)lum;
@@ -389,11 +498,19 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
                   lum->lmm_magic == LOV_MAGIC_V3) {
                ent_count = 1;
                layout->llot_is_composite = false;
+
+               if (lov_xattr_size <= 0) {
+                       errno = EINVAL;
+                       goto out_layout;
+               }
+       } else {
+               errno = EOPNOTSUPP;
+               goto out_layout;
        }
 
        if (ent_count == 0) {
                errno = EINVAL;
-               goto error;
+               goto out_layout;
        }
 
        v1 = (struct lov_user_md *)lum;
@@ -402,21 +519,23 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
                        ent = &comp_v1->lcm_entries[i];
                        v1 = (struct lov_user_md *)((char *)comp_v1 +
                                ent->lcme_offset);
-                       lum_size = ent->lcme_size;
+                       lov_xattr_size = ent->lcme_size;
                } else {
                        ent = NULL;
                }
 
-               obj_count = llapi_layout_objects_in_lum(v1, lum_size);
+               obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
                comp = __llapi_comp_alloc(obj_count);
                if (comp == NULL)
-                       goto error;
+                       goto out_layout;
 
                if (ent != NULL) {
                        comp->llc_extent.e_start = ent->lcme_extent.e_start;
                        comp->llc_extent.e_end = ent->lcme_extent.e_end;
                        comp->llc_id = ent->lcme_id;
                        comp->llc_flags = ent->lcme_flags;
+                       if (comp->llc_flags & LCME_FL_NOSYNC)
+                               comp->llc_timestamp = ent->lcme_timestamp;
                } else {
                        comp->llc_extent.e_start = 0;
                        comp->llc_extent.e_end = LUSTRE_EOF;
@@ -426,6 +545,9 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
 
                if (v1->lmm_pattern == LOV_PATTERN_RAID0)
                        comp->llc_pattern = LLAPI_LAYOUT_RAID0;
+               else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 |
+                                        LOV_PATTERN_OVERSTRIPING))
+                       comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING;
                else
                        /* Lustre only supports RAID0 for now. */
                        comp->llc_pattern = v1->lmm_pattern;
@@ -471,10 +593,38 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
                layout->llot_cur_comp = comp;
        }
 
+out:
+       if (lum != lov_xattr)
+               free(lum);
        return layout;
-error:
+out_layout:
        llapi_layout_free(layout);
-       return NULL;
+       layout = NULL;
+       goto out;
+}
+
+__u32 llapi_pattern_to_lov(uint64_t pattern)
+{
+       __u32 lov_pattern;
+
+       switch (pattern) {
+       case LLAPI_LAYOUT_DEFAULT:
+               lov_pattern = LOV_PATTERN_RAID0;
+               break;
+       case LLAPI_LAYOUT_RAID0:
+               lov_pattern = LOV_PATTERN_RAID0;
+               break;
+       case LLAPI_LAYOUT_MDT:
+               lov_pattern = LOV_PATTERN_MDT;
+               break;
+       case LLAPI_LAYOUT_OVERSTRIPING:
+               lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0;
+               break;
+       default:
+               lov_pattern = EINVAL;
+       }
+
+       return lov_pattern;
 }
 
 /**
@@ -571,12 +721,11 @@ llapi_layout_to_lum(const struct llapi_layout *layout)
                }
 
                blob->lmm_magic = magic;
-               if (pattern == LLAPI_LAYOUT_DEFAULT)
-                       blob->lmm_pattern = LOV_PATTERN_RAID0;
-               else if (pattern == LLAPI_LAYOUT_MDT)
-                       blob->lmm_pattern = LOV_PATTERN_MDT;
-               else
-                       blob->lmm_pattern = pattern;
+               blob->lmm_pattern = llapi_pattern_to_lov(pattern);
+               if (blob->lmm_pattern == EINVAL) {
+                       errno = EINVAL;
+                       goto error;
+               }
 
                if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
                        blob->lmm_stripe_size = 0;
@@ -621,6 +770,8 @@ llapi_layout_to_lum(const struct llapi_layout *layout)
                        ent = &comp_v1->lcm_entries[ent_idx];
                        ent->lcme_id = comp->llc_id;
                        ent->lcme_flags = comp->llc_flags;
+                       if (ent->lcme_flags & LCME_FL_NOSYNC)
+                               ent->lcme_timestamp = comp->llc_timestamp;
                        ent->lcme_extent.e_start = comp->llc_extent.e_start;
                        ent->lcme_extent.e_end = comp->llc_extent.e_end;
                        ent->lcme_size = blob_size;
@@ -650,7 +801,7 @@ static void get_parent_dir(const char *path, char *buf, size_t size)
 {
        char *p;
 
-       strncpy(buf, path, size);
+       strncpy(buf, path, size - 1);
        p = strrchr(buf, '/');
 
        if (p != NULL) {
@@ -748,77 +899,6 @@ static bool is_any_specified(const struct llapi_layout *layout)
 }
 
 /**
- * Check if the given \a lum_size is large enough to hold the required
- * fields in \a lum.
- *
- * \param[in] lum      the struct lov_user_md to check
- * \param[in] lum_size the number of bytes in \a lum
- *
- * \retval true                the \a lum_size is too small
- * \retval false       the \a lum_size is large enough
- */
-static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
-{
-       uint32_t magic;
-
-       if (lum_size < sizeof(lum->lmm_magic))
-               return true;
-
-       if (lum->lmm_magic == LOV_MAGIC_V1 ||
-           lum->lmm_magic == __swab32(LOV_MAGIC_V1))
-               magic = LOV_MAGIC_V1;
-       else if (lum->lmm_magic == LOV_MAGIC_V3 ||
-                lum->lmm_magic == __swab32(LOV_MAGIC_V3))
-               magic = LOV_MAGIC_V3;
-       else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
-                lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
-               magic = LOV_MAGIC_COMP_V1;
-       else
-               return true;
-
-       if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
-               return lum_size < lov_user_md_size(0, magic);
-       else
-               return lum_size < sizeof(struct lov_comp_md_v1);
-}
-
-/* Verify if the objects count in lum is consistent with the
- * stripe count in lum. It applies to regular file only. */
-static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
-{
-       struct lov_comp_md_v1 *comp_v1 = NULL;
-       int i, ent_count, obj_count;
-
-       if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
-               comp_v1 = (struct lov_comp_md_v1 *)lum;
-               ent_count = comp_v1->lcm_entry_count;
-       } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
-                  lum->lmm_magic == LOV_MAGIC_V3) {
-               ent_count = 1;
-       } else {
-               return false;
-       }
-
-       for (i = 0; i < ent_count; i++) {
-               if (comp_v1) {
-                       lum = (struct lov_user_md *)((char *)comp_v1 +
-                               comp_v1->lcm_entries[i].lcme_offset);
-                       lum_size = comp_v1->lcm_entries[i].lcme_size;
-               }
-               obj_count = llapi_layout_objects_in_lum(lum, lum_size);
-
-               if (comp_v1) {
-                       if (!(comp_v1->lcm_entries[i].lcme_flags &
-                                LCME_FL_INIT) && obj_count != 0)
-                               return false;
-               } else if (obj_count != lum->lmm_stripe_count) {
-                       return false;
-               }
-       }
-       return true;
-}
-
-/**
  * Get the striping layout for the file referenced by file descriptor \a fd.
  *
  * If the filesystem does not support the "lustre." xattr namespace, the
@@ -857,14 +937,6 @@ struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags)
                goto out;
        }
 
-       /* Return an error if we got back a partial layout. */
-       if (llapi_layout_lum_truncated(lum, bytes_read)) {
-               errno = EINTR;
-               goto out;
-       }
-
-       llapi_layout_swab_lov_user_md(lum, bytes_read);
-
        /* Directories may have a positive non-zero lum->lmm_stripe_count
         * yet have an empty lum->lmm_objects array. For non-directories the
         * amount of data returned from the kernel must be consistent
@@ -872,12 +944,8 @@ struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags)
        if (fstat(fd, &st) < 0)
                goto out;
 
-       if (!S_ISDIR(st.st_mode) && !llapi_layout_lum_valid(lum, bytes_read)) {
-               errno = EINTR;
-               goto out;
-       }
-
-       layout = llapi_layout_from_lum(lum, bytes_read);
+       layout = llapi_layout_get_by_xattr(lum, bytes_read,
+               S_ISDIR(st.st_mode) ? 0 : LLAPI_LXF_CHECK);
 out:
        free(lum);
        return layout;
@@ -1084,6 +1152,13 @@ static bool llapi_layout_stripe_count_is_valid(int64_t stripe_count)
                 llapi_stripe_count_is_valid(stripe_count));
 }
 
+static bool llapi_layout_extension_size_is_valid(uint64_t ext_size)
+{
+       return (ext_size != 0 &&
+               llapi_stripe_size_is_aligned(ext_size) &&
+               !llapi_stripe_size_is_too_big(ext_size));
+}
+
 static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size)
 {
        return stripe_size == LLAPI_LAYOUT_DEFAULT ||
@@ -1128,18 +1203,20 @@ int llapi_layout_stripe_count_set(struct llapi_layout *layout,
 }
 
 /**
- * Get the stripe size of \a layout.
+ * Get the stripe/extension size of \a layout.
  *
  * \param[in] layout   layout to get stripe size from
  * \param[out] size    integer to store stripe size in
+ * \param[in] extension flag if extenion size is requested
  *
  * \retval     0 on success
  * \retval     -1 if arguments are invalid
  */
-int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
-                                uint64_t *size)
+static int layout_stripe_size_get(const struct llapi_layout *layout,
+                                 uint64_t *size, bool extension)
 {
        struct llapi_layout_comp *comp;
+       int comp_ext;
 
        comp = __llapi_layout_cur_comp(layout);
        if (comp == NULL)
@@ -1150,39 +1227,82 @@ int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
                return -1;
        }
 
+       comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
+       if ((comp_ext && !extension) || (!comp_ext && extension)) {
+               errno = EINVAL;
+               return -1;
+       }
+
        *size = comp->llc_stripe_size;
+       if (comp->llc_flags & LCME_FL_EXTENSION)
+               *size *= SEL_UNIT_SIZE;
 
        return 0;
 }
 
+int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
+                                uint64_t *size)
+{
+       return layout_stripe_size_get(layout, size, false);
+}
+
+int llapi_layout_extension_size_get(const struct llapi_layout *layout,
+                                   uint64_t *size)
+{
+       return layout_stripe_size_get(layout, size, true);
+}
+
 /**
- * Set the stripe size of \a layout.
+ * Set the stripe/extension size of \a layout.
  *
  * \param[in] layout   layout to set stripe size in
  * \param[in] size     value to be set
+ * \param[in] extension flag if extenion size is passed
  *
  * \retval     0 on success
  * \retval     -1 if arguments are invalid
  */
-int llapi_layout_stripe_size_set(struct llapi_layout *layout,
-                                uint64_t size)
+static int layout_stripe_size_set(struct llapi_layout *layout,
+                                 uint64_t size, bool extension)
 {
        struct llapi_layout_comp *comp;
+       int comp_ext;
 
        comp = __llapi_layout_cur_comp(layout);
        if (comp == NULL)
                return -1;
 
-       if (!llapi_layout_stripe_size_is_valid(size)) {
+       comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
+       if ((comp_ext && !extension) || (!comp_ext && extension)) {
                errno = EINVAL;
                return -1;
        }
 
-       comp->llc_stripe_size = size;
+       if (comp_ext)
+               size /= SEL_UNIT_SIZE;
+
+       if ((comp_ext && !llapi_layout_extension_size_is_valid(size)) ||
+           (!comp_ext && !llapi_layout_stripe_size_is_valid(size))) {
+               errno = EINVAL;
+               return -1;
+       }
 
+       comp->llc_stripe_size = size;
        return 0;
 }
 
+int llapi_layout_stripe_size_set(struct llapi_layout *layout,
+                                uint64_t size)
+{
+       return layout_stripe_size_set(layout, size, false);
+}
+
+int llapi_layout_extension_size_set(struct llapi_layout *layout,
+                                   uint64_t size)
+{
+       return layout_stripe_size_set(layout, size, true);
+}
+
 /**
  * Get the RAID pattern of \a layout.
  *
@@ -1230,7 +1350,8 @@ int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
                return -1;
 
        if (pattern != LLAPI_LAYOUT_DEFAULT &&
-           pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT) {
+           pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT
+           && pattern != LLAPI_LAYOUT_OVERSTRIPING) {
                errno = EOPNOTSUPP;
                return -1;
        }
@@ -1430,7 +1551,7 @@ int llapi_layout_pool_name_set(struct llapi_layout *layout,
  *
  * \param[in] path             name of the file to open
  * \param[in] open_flags       open() flags
- * \param[in] mode             permissions to create new file with
+ * \param[in] mode             permissions to create file, filtered by umask
  * \param[in] layout           layout to create new file with
  *
  * \retval             non-negative file descriptor on successful open
@@ -1538,6 +1659,32 @@ int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
        return 0;
 }
 
+const char *llapi_layout_flags_string(uint32_t flags)
+{
+       switch (flags & LCM_FL_FLR_MASK) {
+       case LCM_FL_RDONLY:
+               return "ro";
+       case LCM_FL_WRITE_PENDING:
+               return "wp";
+       case LCM_FL_SYNC_PENDING:
+               return "sp";
+       }
+
+       return "0";
+}
+
+const __u16 llapi_layout_string_flags(char *string)
+{
+       if (strncmp(string, "ro", strlen(string)) == 0)
+               return LCM_FL_RDONLY;
+       if (strncmp(string, "wp", strlen(string)) == 0)
+               return LCM_FL_WRITE_PENDING;
+       if (strncmp(string, "sp", strlen(string)) == 0)
+               return LCM_FL_SYNC_PENDING;
+
+       return 0;
+}
+
 /**
  * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
  * @count: Mirror count value to be checked.
@@ -1648,7 +1795,7 @@ int llapi_layout_comp_extent_set(struct llapi_layout *layout,
        if (comp == NULL)
                return -1;
 
-       if (start >= end) {
+       if (start > end) {
                errno = EINVAL;
                return -1;
        }
@@ -1660,7 +1807,7 @@ int llapi_layout_comp_extent_set(struct llapi_layout *layout,
        if (comp->llc_list.prev != &layout->llot_comp_list) {
                prev = list_entry(comp->llc_list.prev, typeof(*prev),
                                  llc_list);
-               if (start != prev->llc_extent.e_end) {
+               if (start != 0 && start != prev->llc_extent.e_end) {
                        errno = EINVAL;
                        return -1;
                }
@@ -1669,7 +1816,8 @@ int llapi_layout_comp_extent_set(struct llapi_layout *layout,
        if (comp->llc_list.next != &layout->llot_comp_list) {
                next = list_entry(comp->llc_list.next, typeof(*next),
                                  llc_list);
-               if (end != next->llc_extent.e_start) {
+               if (next->llc_extent.e_start != 0 &&
+                   end != next->llc_extent.e_start) {
                        errno = EINVAL;
                        return -1;
                }
@@ -1848,6 +1996,37 @@ int llapi_layout_comp_add(struct llapi_layout *layout)
 
        return 0;
 }
+/**
+ * Adds a first component of a mirror to \a layout.
+ * The \a layout will change it's current component pointer to
+ * the newly added component, and it'll be turned into a composite
+ * layout if it was not before the adding.
+ *
+ * \param[in] layout           existing composite or plain layout
+ *
+ * \retval     0 on success
+ * \retval     <0 if error occurs
+ */
+int llapi_layout_add_first_comp(struct llapi_layout *layout)
+{
+       struct llapi_layout_comp *comp, *new;
+
+       comp = __llapi_layout_cur_comp(layout);
+       if (comp == NULL)
+               return -1;
+
+       new = __llapi_comp_alloc(0);
+       if (new == NULL)
+               return -1;
+
+       new->llc_extent.e_start = 0;
+
+       list_add_tail(&new->llc_list, &layout->llot_comp_list);
+       layout->llot_cur_comp = new;
+       layout->llot_is_composite = true;
+
+       return 0;
+}
 
 /**
  * Deletes current component from the composite layout. The component
@@ -2231,7 +2410,13 @@ bool llapi_layout_is_composite(struct llapi_layout *layout)
 /**
  * Iterate every components in the @layout and call callback function @cb.
  *
- * \param[in]
+ * \param[in] layout   component layout list.
+ * \param[in] cb       callback for each component
+ * \param[in] cbdata   callback data
+ *
+ * \retval < 0                         error happens during the iteration
+ * \retval LLAPI_LAYOUT_ITER_CONT      finished the iteration w/o error
+ * \retval LLAPI_LAYOUT_ITER_STOP      got something, stop the iteration
  */
 int llapi_layout_comp_iterate(struct llapi_layout *layout,
                              llapi_layout_iter_cb cb, void *cbdata)
@@ -2242,7 +2427,13 @@ int llapi_layout_comp_iterate(struct llapi_layout *layout,
        if (rc < 0)
                return rc;
 
-       while (rc == 0) {
+       /**
+        * make sure on success llapi_layout_comp_use() API returns 0 with
+        * USE_FIRST.
+        */
+       assert(rc == 0);
+
+       while (1) {
                rc = cb(layout, cbdata);
                if (rc != LLAPI_LAYOUT_ITER_CONT)
                        break;
@@ -2250,9 +2441,11 @@ int llapi_layout_comp_iterate(struct llapi_layout *layout,
                rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
                if (rc < 0)
                        return rc;
+               else if (rc == 1)       /* reached the last comp */
+                       return LLAPI_LAYOUT_ITER_CONT;
        }
 
-       return rc >= 0 ? LLAPI_LAYOUT_ITER_CONT : rc;
+       return rc;
 }
 
 /**
@@ -2384,6 +2577,10 @@ int llapi_mirror_find_stale(struct llapi_layout *layout,
                        /* not in the specified mirror */
                        if (j == ids_nr)
                                goto next;
+               } else if (flags & LCME_FL_NOSYNC) {
+                       /* if not specified mirrors, do not resync "nosync"
+                        * mirrors */
+                       goto next;
                }
 
                rc = llapi_layout_comp_id_get(layout, &id);
@@ -2418,9 +2615,9 @@ error:
 }
 
 /* locate @layout to a valid component covering file [file_start, file_end) */
-static uint32_t llapi_mirror_find(struct llapi_layout *layout,
-                                 uint64_t file_start, uint64_t file_end,
-                                 uint64_t *endp)
+uint32_t llapi_mirror_find(struct llapi_layout *layout,
+                          uint64_t file_start, uint64_t file_end,
+                          uint64_t *endp)
 {
        uint32_t mirror_id = 0;
        int rc;
@@ -2473,12 +2670,21 @@ static uint32_t llapi_mirror_find(struct llapi_layout *layout,
        return mirror_id;
 }
 
-ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout,
-                               uint32_t dst, uint64_t start, uint64_t end)
+int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
+                            struct llapi_resync_comp *comp_array,
+                            int comp_size,  uint64_t start, uint64_t end)
 {
-       uint64_t mirror_end = 0;
-       ssize_t result = 0;
-       size_t count;
+       uint64_t count;
+       size_t page_size = sysconf(_SC_PAGESIZE);
+       const size_t buflen = 4 << 20; /* 4M */
+       void *buf;
+       uint64_t pos = start;
+       int i;
+       int rc;
+
+       rc = posix_memalign(&buf, page_size, buflen);
+       if (rc)
+               return -rc;
 
        if (end == OBD_OBJECT_EOF)
                count = OBD_OBJECT_EOF;
@@ -2487,30 +2693,99 @@ ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout,
 
        while (count > 0) {
                uint32_t src;
-               size_t to_copy;
-               ssize_t copied;
+               uint64_t mirror_end = 0;
+               uint64_t bytes_left;
+               ssize_t bytes_read;
+               size_t to_read;
+               size_t to_write;
 
-               src = llapi_mirror_find(layout, start, end, &mirror_end);
+               src = llapi_mirror_find(layout, pos, end, &mirror_end);
                if (src == 0)
                        return -ENOENT;
 
-               if (mirror_end == OBD_OBJECT_EOF)
-                       to_copy = count;
-               else
-                       to_copy = MIN(count, mirror_end - start);
-
-               copied = llapi_mirror_copy(fd, src, dst, start, to_copy);
-               if (copied < 0)
-                       return copied;
+               if (mirror_end == OBD_OBJECT_EOF) {
+                       bytes_left = count;
+               } else {
+                       bytes_left = MIN(count, mirror_end - pos);
+                       bytes_left = ((bytes_left - 1) | (page_size - 1)) + 1;
+               }
+               to_read = MIN(buflen, bytes_left);
 
-               result += copied;
-               if (copied < to_copy) /* end of file */
+               bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
+               if (bytes_read == 0) {
+                       /* end of file */
+                       break;
+               }
+               if (bytes_read < 0) {
+                       rc = bytes_read;
                        break;
+               }
+
+               /* round up to page align to make direct IO happy. */
+               to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
+
+               for (i = 0; i < comp_size; i++) {
+                       ssize_t written;
+                       off_t pos2 = pos;
+                       size_t to_write2 = to_write;
+
+                       /* skip non-overlapped component */
+                       if (pos >= comp_array[i].lrc_end ||
+                           pos + to_write <= comp_array[i].lrc_start)
+                               continue;
+
+                       if (pos < comp_array[i].lrc_start)
+                               pos2 = comp_array[i].lrc_start;
+
+                       to_write2 -= pos2 - pos;
+
+                       if ((pos + to_write) > comp_array[i].lrc_end)
+                               to_write2 -= pos + to_write -
+                                            comp_array[i].lrc_end;
+
+                       written = llapi_mirror_write(fd,
+                                       comp_array[i].lrc_mirror_id,
+                                       buf + pos2 - pos,
+                                       to_write2, pos2);
+                       if (written < 0) {
+                               /**
+                                * this component is not written successfully,
+                                * mark it using its lrc_synced, it is supposed
+                                * to be false before getting here.
+                                *
+                                * And before this function returns, all
+                                * elements of comp_array will reverse their
+                                * lrc_synced flag to reflect their true
+                                * meanings.
+                                */
+                               comp_array[i].lrc_synced = true;
+                               continue;
+                       }
+                       assert(written == to_write2);
+               }
 
-               if (count != OBD_OBJECT_EOF)
-                       count -= copied;
-               start += copied;
+               pos += bytes_read;
+               count -= bytes_read;
        }
 
-       return result;
+       free(buf);
+
+       if (rc < 0) {
+               for (i = 0; i < comp_size; i++)
+                       comp_array[i].lrc_synced = false;
+               return rc;
+       }
+
+       for (i = 0; i < comp_size; i++) {
+               comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
+               if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
+                       rc = llapi_mirror_truncate(fd,
+                                       comp_array[i].lrc_mirror_id, pos);
+                       if (rc < 0)
+                               comp_array[i].lrc_synced = false;
+               }
+       }
+
+       /* partially successful is successful */
+       return 0;
 }