X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fliblustreapi_layout.c;h=ffee3fca8a24120f913123c62877831b12e0f19f;hp=531e0835564280160bec34a906e902c046dbfc05;hb=ff5eb304fa371d879da38621fac3aec7d4548a5e;hpb=aa1646a8a1c2a91a1571147994229b6413ee7acc diff --git a/lustre/utils/liblustreapi_layout.c b/lustre/utils/liblustreapi_layout.c index 531e083..ffee3fc 100644 --- a/lustre/utils/liblustreapi_layout.c +++ b/lustre/utils/liblustreapi_layout.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,7 @@ struct llapi_layout_comp { struct lu_extent llc_extent; /* [start, end) of component */ uint32_t llc_id; /* unique ID of component */ uint32_t llc_flags; /* LCME_FL_* flags */ + uint64_t llc_timestamp; /* snapshot timestamp */ struct list_head llc_list; /* linked to the llapi_layout components list */ }; @@ -147,6 +149,7 @@ llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size) ent = &comp_v1->lcm_entries[i]; __swab32s(&ent->lcme_id); __swab32s(&ent->lcme_flags); + __swab64s(&ent->lcme_timestamp); __swab64s(&ent->lcme_extent.e_start); __swab64s(&ent->lcme_extent.e_end); __swab32s(&ent->lcme_offset); @@ -354,28 +357,134 @@ struct llapi_layout *llapi_layout_alloc(void) } /** + * Check if the given \a lum_size is large enough to hold the required + * fields in \a lum. + * + * \param[in] lum the struct lov_user_md to check + * \param[in] lum_size the number of bytes in \a lum + * + * \retval true the \a lum_size is too small + * \retval false the \a lum_size is large enough + */ +static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size) +{ + uint32_t magic; + + if (lum_size < sizeof(lum->lmm_magic)) + return true; + + if (lum->lmm_magic == LOV_MAGIC_V1 || + lum->lmm_magic == __swab32(LOV_MAGIC_V1)) + magic = LOV_MAGIC_V1; + else if (lum->lmm_magic == LOV_MAGIC_V3 || + lum->lmm_magic == __swab32(LOV_MAGIC_V3)) + magic = LOV_MAGIC_V3; + else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 || + lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1)) + magic = LOV_MAGIC_COMP_V1; + else + return true; + + if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3) + return lum_size < lov_user_md_size(0, magic); + else + return lum_size < sizeof(struct lov_comp_md_v1); +} + +/* Verify if the objects count in lum is consistent with the + * stripe count in lum. It applies to regular file only. */ +static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size) +{ + struct lov_comp_md_v1 *comp_v1 = NULL; + int i, ent_count, obj_count; + + if (lum->lmm_magic == LOV_MAGIC_COMP_V1) { + comp_v1 = (struct lov_comp_md_v1 *)lum; + ent_count = comp_v1->lcm_entry_count; + } else if (lum->lmm_magic == LOV_MAGIC_V1 || + lum->lmm_magic == LOV_MAGIC_V3) { + ent_count = 1; + } else { + return false; + } + + for (i = 0; i < ent_count; i++) { + if (comp_v1) { + lum = (struct lov_user_md *)((char *)comp_v1 + + comp_v1->lcm_entries[i].lcme_offset); + lum_size = comp_v1->lcm_entries[i].lcme_size; + } + obj_count = llapi_layout_objects_in_lum(lum, lum_size); + + if (comp_v1) { + if (!(comp_v1->lcm_entries[i].lcme_flags & + LCME_FL_INIT) && obj_count != 0) + return false; + } else if (obj_count != lum->lmm_stripe_count) { + return false; + } + } + return true; +} + +/** * Convert the data from a lov_user_md to a newly allocated llapi_layout. * The caller is responsible for freeing the returned pointer. * - * \param[in] lum LOV user metadata structure to copy data from - * \param[in] lum_size size the the lum passed in + * \param[in] lov_xattr LOV user metadata xattr to copy data from + * \param[in] lov_xattr_size size the lov_xattr_size passed in + * \param[in] flags bitwise-or'd flags to control the behavior * * \retval valid llapi_layout pointer on success * \retval NULL if memory allocation fails */ -static struct llapi_layout * -llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) +struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr, + ssize_t lov_xattr_size, + uint32_t flags) { + struct lov_user_md *lum = lov_xattr; struct lov_comp_md_v1 *comp_v1 = NULL; struct lov_comp_md_entry_v1 *ent; struct lov_user_md *v1; - struct llapi_layout *layout; + struct llapi_layout *layout = NULL; struct llapi_layout_comp *comp; int i, ent_count = 0, obj_count; - layout = __llapi_layout_alloc(); - if (layout == NULL) + if (lov_xattr == NULL || lov_xattr_size <= 0) { + errno = EINVAL; + return NULL; + } + + /* Return an error if we got back a partial layout. */ + if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) { + errno = ERANGE; return NULL; + } + +#if __BYTE_ORDER == __BIG_ENDIAN + if (flags & LLAPI_LXF_COPY) { + lum = malloc(lov_xattr_size); + if (lum == NULL) { + errno = ENOMEM; + return NULL; + } + memcpy(lum, lov_xattr, lov_xattr_size); + } +#endif + + llapi_layout_swab_lov_user_md(lum, lov_xattr_size); + + if ((flags & LLAPI_LXF_CHECK) && + !llapi_layout_lum_valid(lum, lov_xattr_size)) { + errno = EBADSLT; + goto out; + } + + layout = __llapi_layout_alloc(); + if (layout == NULL) { + errno = ENOMEM; + goto out; + } if (lum->lmm_magic == LOV_MAGIC_COMP_V1) { comp_v1 = (struct lov_comp_md_v1 *)lum; @@ -389,11 +498,19 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) lum->lmm_magic == LOV_MAGIC_V3) { ent_count = 1; layout->llot_is_composite = false; + + if (lov_xattr_size <= 0) { + errno = EINVAL; + goto out_layout; + } + } else { + errno = EOPNOTSUPP; + goto out_layout; } if (ent_count == 0) { errno = EINVAL; - goto error; + goto out_layout; } v1 = (struct lov_user_md *)lum; @@ -402,21 +519,23 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) ent = &comp_v1->lcm_entries[i]; v1 = (struct lov_user_md *)((char *)comp_v1 + ent->lcme_offset); - lum_size = ent->lcme_size; + lov_xattr_size = ent->lcme_size; } else { ent = NULL; } - obj_count = llapi_layout_objects_in_lum(v1, lum_size); + obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size); comp = __llapi_comp_alloc(obj_count); if (comp == NULL) - goto error; + goto out_layout; if (ent != NULL) { comp->llc_extent.e_start = ent->lcme_extent.e_start; comp->llc_extent.e_end = ent->lcme_extent.e_end; comp->llc_id = ent->lcme_id; comp->llc_flags = ent->lcme_flags; + if (comp->llc_flags & LCME_FL_NOSYNC) + comp->llc_timestamp = ent->lcme_timestamp; } else { comp->llc_extent.e_start = 0; comp->llc_extent.e_end = LUSTRE_EOF; @@ -426,6 +545,9 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) if (v1->lmm_pattern == LOV_PATTERN_RAID0) comp->llc_pattern = LLAPI_LAYOUT_RAID0; + else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 | + LOV_PATTERN_OVERSTRIPING)) + comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING; else /* Lustre only supports RAID0 for now. */ comp->llc_pattern = v1->lmm_pattern; @@ -471,10 +593,38 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) layout->llot_cur_comp = comp; } +out: + if (lum != lov_xattr) + free(lum); return layout; -error: +out_layout: llapi_layout_free(layout); - return NULL; + layout = NULL; + goto out; +} + +__u32 llapi_pattern_to_lov(uint64_t pattern) +{ + __u32 lov_pattern; + + switch (pattern) { + case LLAPI_LAYOUT_DEFAULT: + lov_pattern = LOV_PATTERN_RAID0; + break; + case LLAPI_LAYOUT_RAID0: + lov_pattern = LOV_PATTERN_RAID0; + break; + case LLAPI_LAYOUT_MDT: + lov_pattern = LOV_PATTERN_MDT; + break; + case LLAPI_LAYOUT_OVERSTRIPING: + lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0; + break; + default: + lov_pattern = EINVAL; + } + + return lov_pattern; } /** @@ -571,12 +721,11 @@ llapi_layout_to_lum(const struct llapi_layout *layout) } blob->lmm_magic = magic; - if (pattern == LLAPI_LAYOUT_DEFAULT) - blob->lmm_pattern = LOV_PATTERN_RAID0; - else if (pattern == LLAPI_LAYOUT_MDT) - blob->lmm_pattern = LOV_PATTERN_MDT; - else - blob->lmm_pattern = pattern; + blob->lmm_pattern = llapi_pattern_to_lov(pattern); + if (blob->lmm_pattern == EINVAL) { + errno = EINVAL; + goto error; + } if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT) blob->lmm_stripe_size = 0; @@ -621,6 +770,8 @@ llapi_layout_to_lum(const struct llapi_layout *layout) ent = &comp_v1->lcm_entries[ent_idx]; ent->lcme_id = comp->llc_id; ent->lcme_flags = comp->llc_flags; + if (ent->lcme_flags & LCME_FL_NOSYNC) + ent->lcme_timestamp = comp->llc_timestamp; ent->lcme_extent.e_start = comp->llc_extent.e_start; ent->lcme_extent.e_end = comp->llc_extent.e_end; ent->lcme_size = blob_size; @@ -650,7 +801,7 @@ static void get_parent_dir(const char *path, char *buf, size_t size) { char *p; - strncpy(buf, path, size); + strncpy(buf, path, size - 1); p = strrchr(buf, '/'); if (p != NULL) { @@ -748,77 +899,6 @@ static bool is_any_specified(const struct llapi_layout *layout) } /** - * Check if the given \a lum_size is large enough to hold the required - * fields in \a lum. - * - * \param[in] lum the struct lov_user_md to check - * \param[in] lum_size the number of bytes in \a lum - * - * \retval true the \a lum_size is too small - * \retval false the \a lum_size is large enough - */ -static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size) -{ - uint32_t magic; - - if (lum_size < sizeof(lum->lmm_magic)) - return true; - - if (lum->lmm_magic == LOV_MAGIC_V1 || - lum->lmm_magic == __swab32(LOV_MAGIC_V1)) - magic = LOV_MAGIC_V1; - else if (lum->lmm_magic == LOV_MAGIC_V3 || - lum->lmm_magic == __swab32(LOV_MAGIC_V3)) - magic = LOV_MAGIC_V3; - else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 || - lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1)) - magic = LOV_MAGIC_COMP_V1; - else - return true; - - if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3) - return lum_size < lov_user_md_size(0, magic); - else - return lum_size < sizeof(struct lov_comp_md_v1); -} - -/* Verify if the objects count in lum is consistent with the - * stripe count in lum. It applies to regular file only. */ -static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size) -{ - struct lov_comp_md_v1 *comp_v1 = NULL; - int i, ent_count, obj_count; - - if (lum->lmm_magic == LOV_MAGIC_COMP_V1) { - comp_v1 = (struct lov_comp_md_v1 *)lum; - ent_count = comp_v1->lcm_entry_count; - } else if (lum->lmm_magic == LOV_MAGIC_V1 || - lum->lmm_magic == LOV_MAGIC_V3) { - ent_count = 1; - } else { - return false; - } - - for (i = 0; i < ent_count; i++) { - if (comp_v1) { - lum = (struct lov_user_md *)((char *)comp_v1 + - comp_v1->lcm_entries[i].lcme_offset); - lum_size = comp_v1->lcm_entries[i].lcme_size; - } - obj_count = llapi_layout_objects_in_lum(lum, lum_size); - - if (comp_v1) { - if (!(comp_v1->lcm_entries[i].lcme_flags & - LCME_FL_INIT) && obj_count != 0) - return false; - } else if (obj_count != lum->lmm_stripe_count) { - return false; - } - } - return true; -} - -/** * Get the striping layout for the file referenced by file descriptor \a fd. * * If the filesystem does not support the "lustre." xattr namespace, the @@ -857,14 +937,6 @@ struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags) goto out; } - /* Return an error if we got back a partial layout. */ - if (llapi_layout_lum_truncated(lum, bytes_read)) { - errno = EINTR; - goto out; - } - - llapi_layout_swab_lov_user_md(lum, bytes_read); - /* Directories may have a positive non-zero lum->lmm_stripe_count * yet have an empty lum->lmm_objects array. For non-directories the * amount of data returned from the kernel must be consistent @@ -872,12 +944,8 @@ struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags) if (fstat(fd, &st) < 0) goto out; - if (!S_ISDIR(st.st_mode) && !llapi_layout_lum_valid(lum, bytes_read)) { - errno = EINTR; - goto out; - } - - layout = llapi_layout_from_lum(lum, bytes_read); + layout = llapi_layout_get_by_xattr(lum, bytes_read, + S_ISDIR(st.st_mode) ? 0 : LLAPI_LXF_CHECK); out: free(lum); return layout; @@ -1084,6 +1152,13 @@ static bool llapi_layout_stripe_count_is_valid(int64_t stripe_count) llapi_stripe_count_is_valid(stripe_count)); } +static bool llapi_layout_extension_size_is_valid(uint64_t ext_size) +{ + return (ext_size != 0 && + llapi_stripe_size_is_aligned(ext_size) && + !llapi_stripe_size_is_too_big(ext_size)); +} + static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size) { return stripe_size == LLAPI_LAYOUT_DEFAULT || @@ -1128,18 +1203,20 @@ int llapi_layout_stripe_count_set(struct llapi_layout *layout, } /** - * Get the stripe size of \a layout. + * Get the stripe/extension size of \a layout. * * \param[in] layout layout to get stripe size from * \param[out] size integer to store stripe size in + * \param[in] extension flag if extenion size is requested * * \retval 0 on success * \retval -1 if arguments are invalid */ -int llapi_layout_stripe_size_get(const struct llapi_layout *layout, - uint64_t *size) +static int layout_stripe_size_get(const struct llapi_layout *layout, + uint64_t *size, bool extension) { struct llapi_layout_comp *comp; + int comp_ext; comp = __llapi_layout_cur_comp(layout); if (comp == NULL) @@ -1150,39 +1227,82 @@ int llapi_layout_stripe_size_get(const struct llapi_layout *layout, return -1; } + comp_ext = comp->llc_flags & LCME_FL_EXTENSION; + if ((comp_ext && !extension) || (!comp_ext && extension)) { + errno = EINVAL; + return -1; + } + *size = comp->llc_stripe_size; + if (comp->llc_flags & LCME_FL_EXTENSION) + *size *= SEL_UNIT_SIZE; return 0; } +int llapi_layout_stripe_size_get(const struct llapi_layout *layout, + uint64_t *size) +{ + return layout_stripe_size_get(layout, size, false); +} + +int llapi_layout_extension_size_get(const struct llapi_layout *layout, + uint64_t *size) +{ + return layout_stripe_size_get(layout, size, true); +} + /** - * Set the stripe size of \a layout. + * Set the stripe/extension size of \a layout. * * \param[in] layout layout to set stripe size in * \param[in] size value to be set + * \param[in] extension flag if extenion size is passed * * \retval 0 on success * \retval -1 if arguments are invalid */ -int llapi_layout_stripe_size_set(struct llapi_layout *layout, - uint64_t size) +static int layout_stripe_size_set(struct llapi_layout *layout, + uint64_t size, bool extension) { struct llapi_layout_comp *comp; + int comp_ext; comp = __llapi_layout_cur_comp(layout); if (comp == NULL) return -1; - if (!llapi_layout_stripe_size_is_valid(size)) { + comp_ext = comp->llc_flags & LCME_FL_EXTENSION; + if ((comp_ext && !extension) || (!comp_ext && extension)) { errno = EINVAL; return -1; } - comp->llc_stripe_size = size; + if (comp_ext) + size /= SEL_UNIT_SIZE; + + if ((comp_ext && !llapi_layout_extension_size_is_valid(size)) || + (!comp_ext && !llapi_layout_stripe_size_is_valid(size))) { + errno = EINVAL; + return -1; + } + comp->llc_stripe_size = size; return 0; } +int llapi_layout_stripe_size_set(struct llapi_layout *layout, + uint64_t size) +{ + return layout_stripe_size_set(layout, size, false); +} + +int llapi_layout_extension_size_set(struct llapi_layout *layout, + uint64_t size) +{ + return layout_stripe_size_set(layout, size, true); +} + /** * Get the RAID pattern of \a layout. * @@ -1230,7 +1350,8 @@ int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern) return -1; if (pattern != LLAPI_LAYOUT_DEFAULT && - pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT) { + pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT + && pattern != LLAPI_LAYOUT_OVERSTRIPING) { errno = EOPNOTSUPP; return -1; } @@ -1430,7 +1551,7 @@ int llapi_layout_pool_name_set(struct llapi_layout *layout, * * \param[in] path name of the file to open * \param[in] open_flags open() flags - * \param[in] mode permissions to create new file with + * \param[in] mode permissions to create file, filtered by umask * \param[in] layout layout to create new file with * * \retval non-negative file descriptor on successful open @@ -1538,6 +1659,32 @@ int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags) return 0; } +const char *llapi_layout_flags_string(uint32_t flags) +{ + switch (flags & LCM_FL_FLR_MASK) { + case LCM_FL_RDONLY: + return "ro"; + case LCM_FL_WRITE_PENDING: + return "wp"; + case LCM_FL_SYNC_PENDING: + return "sp"; + } + + return "0"; +} + +const __u16 llapi_layout_string_flags(char *string) +{ + if (strncmp(string, "ro", strlen(string)) == 0) + return LCM_FL_RDONLY; + if (strncmp(string, "wp", strlen(string)) == 0) + return LCM_FL_WRITE_PENDING; + if (strncmp(string, "sp", strlen(string)) == 0) + return LCM_FL_SYNC_PENDING; + + return 0; +} + /** * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count. * @count: Mirror count value to be checked. @@ -1648,7 +1795,7 @@ int llapi_layout_comp_extent_set(struct llapi_layout *layout, if (comp == NULL) return -1; - if (start >= end) { + if (start > end) { errno = EINVAL; return -1; } @@ -1660,7 +1807,7 @@ int llapi_layout_comp_extent_set(struct llapi_layout *layout, if (comp->llc_list.prev != &layout->llot_comp_list) { prev = list_entry(comp->llc_list.prev, typeof(*prev), llc_list); - if (start != prev->llc_extent.e_end) { + if (start != 0 && start != prev->llc_extent.e_end) { errno = EINVAL; return -1; } @@ -1669,7 +1816,8 @@ int llapi_layout_comp_extent_set(struct llapi_layout *layout, if (comp->llc_list.next != &layout->llot_comp_list) { next = list_entry(comp->llc_list.next, typeof(*next), llc_list); - if (end != next->llc_extent.e_start) { + if (next->llc_extent.e_start != 0 && + end != next->llc_extent.e_start) { errno = EINVAL; return -1; } @@ -1848,6 +1996,37 @@ int llapi_layout_comp_add(struct llapi_layout *layout) return 0; } +/** + * Adds a first component of a mirror to \a layout. + * The \a layout will change it's current component pointer to + * the newly added component, and it'll be turned into a composite + * layout if it was not before the adding. + * + * \param[in] layout existing composite or plain layout + * + * \retval 0 on success + * \retval <0 if error occurs + */ +int llapi_layout_add_first_comp(struct llapi_layout *layout) +{ + struct llapi_layout_comp *comp, *new; + + comp = __llapi_layout_cur_comp(layout); + if (comp == NULL) + return -1; + + new = __llapi_comp_alloc(0); + if (new == NULL) + return -1; + + new->llc_extent.e_start = 0; + + list_add_tail(&new->llc_list, &layout->llot_comp_list); + layout->llot_cur_comp = new; + layout->llot_is_composite = true; + + return 0; +} /** * Deletes current component from the composite layout. The component @@ -2231,7 +2410,13 @@ bool llapi_layout_is_composite(struct llapi_layout *layout) /** * Iterate every components in the @layout and call callback function @cb. * - * \param[in] + * \param[in] layout component layout list. + * \param[in] cb callback for each component + * \param[in] cbdata callback data + * + * \retval < 0 error happens during the iteration + * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error + * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration */ int llapi_layout_comp_iterate(struct llapi_layout *layout, llapi_layout_iter_cb cb, void *cbdata) @@ -2242,7 +2427,13 @@ int llapi_layout_comp_iterate(struct llapi_layout *layout, if (rc < 0) return rc; - while (rc == 0) { + /** + * make sure on success llapi_layout_comp_use() API returns 0 with + * USE_FIRST. + */ + assert(rc == 0); + + while (1) { rc = cb(layout, cbdata); if (rc != LLAPI_LAYOUT_ITER_CONT) break; @@ -2250,9 +2441,11 @@ int llapi_layout_comp_iterate(struct llapi_layout *layout, rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT); if (rc < 0) return rc; + else if (rc == 1) /* reached the last comp */ + return LLAPI_LAYOUT_ITER_CONT; } - return rc >= 0 ? LLAPI_LAYOUT_ITER_CONT : rc; + return rc; } /** @@ -2384,6 +2577,10 @@ int llapi_mirror_find_stale(struct llapi_layout *layout, /* not in the specified mirror */ if (j == ids_nr) goto next; + } else if (flags & LCME_FL_NOSYNC) { + /* if not specified mirrors, do not resync "nosync" + * mirrors */ + goto next; } rc = llapi_layout_comp_id_get(layout, &id); @@ -2418,9 +2615,9 @@ error: } /* locate @layout to a valid component covering file [file_start, file_end) */ -static uint32_t llapi_mirror_find(struct llapi_layout *layout, - uint64_t file_start, uint64_t file_end, - uint64_t *endp) +uint32_t llapi_mirror_find(struct llapi_layout *layout, + uint64_t file_start, uint64_t file_end, + uint64_t *endp) { uint32_t mirror_id = 0; int rc; @@ -2473,12 +2670,21 @@ static uint32_t llapi_mirror_find(struct llapi_layout *layout, return mirror_id; } -ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout, - uint32_t dst, uint64_t start, uint64_t end) +int llapi_mirror_resync_many(int fd, struct llapi_layout *layout, + struct llapi_resync_comp *comp_array, + int comp_size, uint64_t start, uint64_t end) { - uint64_t mirror_end = 0; - ssize_t result = 0; - size_t count; + uint64_t count; + size_t page_size = sysconf(_SC_PAGESIZE); + const size_t buflen = 4 << 20; /* 4M */ + void *buf; + uint64_t pos = start; + int i; + int rc; + + rc = posix_memalign(&buf, page_size, buflen); + if (rc) + return -rc; if (end == OBD_OBJECT_EOF) count = OBD_OBJECT_EOF; @@ -2487,30 +2693,99 @@ ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout, while (count > 0) { uint32_t src; - size_t to_copy; - ssize_t copied; + uint64_t mirror_end = 0; + uint64_t bytes_left; + ssize_t bytes_read; + size_t to_read; + size_t to_write; - src = llapi_mirror_find(layout, start, end, &mirror_end); + src = llapi_mirror_find(layout, pos, end, &mirror_end); if (src == 0) return -ENOENT; - if (mirror_end == OBD_OBJECT_EOF) - to_copy = count; - else - to_copy = MIN(count, mirror_end - start); - - copied = llapi_mirror_copy(fd, src, dst, start, to_copy); - if (copied < 0) - return copied; + if (mirror_end == OBD_OBJECT_EOF) { + bytes_left = count; + } else { + bytes_left = MIN(count, mirror_end - pos); + bytes_left = ((bytes_left - 1) | (page_size - 1)) + 1; + } + to_read = MIN(buflen, bytes_left); - result += copied; - if (copied < to_copy) /* end of file */ + bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos); + if (bytes_read == 0) { + /* end of file */ + break; + } + if (bytes_read < 0) { + rc = bytes_read; break; + } + + /* round up to page align to make direct IO happy. */ + to_write = ((bytes_read - 1) | (page_size - 1)) + 1; + + for (i = 0; i < comp_size; i++) { + ssize_t written; + off_t pos2 = pos; + size_t to_write2 = to_write; + + /* skip non-overlapped component */ + if (pos >= comp_array[i].lrc_end || + pos + to_write <= comp_array[i].lrc_start) + continue; + + if (pos < comp_array[i].lrc_start) + pos2 = comp_array[i].lrc_start; + + to_write2 -= pos2 - pos; + + if ((pos + to_write) > comp_array[i].lrc_end) + to_write2 -= pos + to_write - + comp_array[i].lrc_end; + + written = llapi_mirror_write(fd, + comp_array[i].lrc_mirror_id, + buf + pos2 - pos, + to_write2, pos2); + if (written < 0) { + /** + * this component is not written successfully, + * mark it using its lrc_synced, it is supposed + * to be false before getting here. + * + * And before this function returns, all + * elements of comp_array will reverse their + * lrc_synced flag to reflect their true + * meanings. + */ + comp_array[i].lrc_synced = true; + continue; + } + assert(written == to_write2); + } - if (count != OBD_OBJECT_EOF) - count -= copied; - start += copied; + pos += bytes_read; + count -= bytes_read; } - return result; + free(buf); + + if (rc < 0) { + for (i = 0; i < comp_size; i++) + comp_array[i].lrc_synced = false; + return rc; + } + + for (i = 0; i < comp_size; i++) { + comp_array[i].lrc_synced = !comp_array[i].lrc_synced; + if (comp_array[i].lrc_synced && pos & (page_size - 1)) { + rc = llapi_mirror_truncate(fd, + comp_array[i].lrc_mirror_id, pos); + if (rc < 0) + comp_array[i].lrc_synced = false; + } + } + + /* partially successful is successful */ + return 0; }