X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Fliblustreapi_layout.c;h=aca20cc2b9ed3784d534a8cd3111a74bd084bb19;hb=90ee0457c9fb1da939558186961f346c917d678f;hp=f206c9667aa784e53985d2edb973310b9cc6cff2;hpb=fa15e5347c2eccd432924e57440829606c7339f6;p=fs%2Flustre-release.git diff --git a/lustre/utils/liblustreapi_layout.c b/lustre/utils/liblustreapi_layout.c index f206c96..aca20cc 100644 --- a/lustre/utils/liblustreapi_layout.c +++ b/lustre/utils/liblustreapi_layout.c @@ -60,8 +60,10 @@ struct llapi_layout_comp { struct lu_extent llc_extent; /* [start, end) of component */ uint32_t llc_id; /* unique ID of component */ uint32_t llc_flags; /* LCME_FL_* flags */ + uint64_t llc_timestamp; /* snapshot timestamp */ struct list_head llc_list; /* linked to the llapi_layout components list */ + bool llc_ondisk; }; /** @@ -133,11 +135,11 @@ llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size) comp_v1 = (struct lov_comp_md_v1 *)lum; if (comp_v1 != NULL) { - __swab32s(&comp_v1->lcm_magic); - __swab32s(&comp_v1->lcm_size); - __swab32s(&comp_v1->lcm_layout_gen); - __swab16s(&comp_v1->lcm_flags); - __swab16s(&comp_v1->lcm_entry_count); + comp_v1->lcm_magic = __swab32(comp_v1->lcm_magic); + comp_v1->lcm_size = __swab32(comp_v1->lcm_size); + comp_v1->lcm_layout_gen = __swab32(comp_v1->lcm_layout_gen); + comp_v1->lcm_flags = __swab16(comp_v1->lcm_flags); + comp_v1->lcm_entry_count = __swab16(comp_v1->lcm_entry_count); ent_count = comp_v1->lcm_entry_count; } else { ent_count = 1; @@ -146,12 +148,13 @@ llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size) for (i = 0; i < ent_count; i++) { if (comp_v1 != NULL) { ent = &comp_v1->lcm_entries[i]; - __swab32s(&ent->lcme_id); - __swab32s(&ent->lcme_flags); - __swab64s(&ent->lcme_extent.e_start); - __swab64s(&ent->lcme_extent.e_end); - __swab32s(&ent->lcme_offset); - __swab32s(&ent->lcme_size); + ent->lcme_id = __swab32(ent->lcme_id); + ent->lcme_flags = __swab32(ent->lcme_flags); + ent->lcme_timestamp = __swab64(ent->lcme_timestamp); + ent->lcme_extent.e_start = __swab64(ent->lcme_extent.e_start); + ent->lcme_extent.e_end = __swab64(ent->lcme_extent.e_end); + ent->lcme_offset = __swab32(ent->lcme_offset); + ent->lcme_size = __swab32(ent->lcme_size); lum = (struct lov_user_md *)((char *)comp_v1 + ent->lcme_offset); @@ -159,11 +162,11 @@ llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size) } obj_count = llapi_layout_objects_in_lum(lum, lum_size); - __swab32s(&lum->lmm_magic); - __swab32s(&lum->lmm_pattern); - __swab32s(&lum->lmm_stripe_size); - __swab16s(&lum->lmm_stripe_count); - __swab16s(&lum->lmm_stripe_offset); + lum->lmm_magic = __swab32(lum->lmm_magic); + lum->lmm_pattern = __swab32(lum->lmm_pattern); + lum->lmm_stripe_size = __swab32(lum->lmm_stripe_size); + lum->lmm_stripe_count = __swab16(lum->lmm_stripe_count); + lum->lmm_stripe_offset = __swab16(lum->lmm_stripe_offset); if (lum->lmm_magic != LOV_MAGIC_V1) { struct lov_user_md_v3 *v3; @@ -174,7 +177,7 @@ llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size) } for (j = 0; j < obj_count; j++) - __swab32s(&lod[j].l_ost_idx); + lod[j].l_ost_idx = __swab32(lod[j].l_ost_idx); } } @@ -355,28 +358,139 @@ struct llapi_layout *llapi_layout_alloc(void) } /** + * Check if the given \a lum_size is large enough to hold the required + * fields in \a lum. + * + * \param[in] lum the struct lov_user_md to check + * \param[in] lum_size the number of bytes in \a lum + * + * \retval true the \a lum_size is too small + * \retval false the \a lum_size is large enough + */ +static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size) +{ + uint32_t magic; + + if (lum_size < sizeof(lum->lmm_magic)) + return true; + + if (lum->lmm_magic == LOV_MAGIC_V1 || + lum->lmm_magic == __swab32(LOV_MAGIC_V1)) + magic = LOV_MAGIC_V1; + else if (lum->lmm_magic == LOV_MAGIC_V3 || + lum->lmm_magic == __swab32(LOV_MAGIC_V3)) + magic = LOV_MAGIC_V3; + else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 || + lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1)) + magic = LOV_MAGIC_COMP_V1; + else + return true; + + if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3) + return lum_size < lov_user_md_size(0, magic); + else + return lum_size < sizeof(struct lov_comp_md_v1); +} + +/* Verify if the objects count in lum is consistent with the + * stripe count in lum. It applies to regular file only. */ +static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size) +{ + struct lov_comp_md_v1 *comp_v1 = NULL; + int i, ent_count, obj_count; + + if (lum->lmm_magic == LOV_MAGIC_COMP_V1) { + comp_v1 = (struct lov_comp_md_v1 *)lum; + ent_count = comp_v1->lcm_entry_count; + } else if (lum->lmm_magic == LOV_MAGIC_V1 || + lum->lmm_magic == LOV_MAGIC_V3) { + ent_count = 1; + } else { + return false; + } + + for (i = 0; i < ent_count; i++) { + if (comp_v1) { + lum = (struct lov_user_md *)((char *)comp_v1 + + comp_v1->lcm_entries[i].lcme_offset); + lum_size = comp_v1->lcm_entries[i].lcme_size; + } + obj_count = llapi_layout_objects_in_lum(lum, lum_size); + + if (comp_v1) { + if (!(comp_v1->lcm_entries[i].lcme_flags & + LCME_FL_INIT) && obj_count != 0) + return false; + } else if (obj_count != lum->lmm_stripe_count) { + return false; + } + } + return true; +} + +/** * Convert the data from a lov_user_md to a newly allocated llapi_layout. * The caller is responsible for freeing the returned pointer. * - * \param[in] lum LOV user metadata structure to copy data from - * \param[in] lum_size size the the lum passed in + * \param[in] lov_xattr LOV user metadata xattr to copy data from + * \param[in] lov_xattr_size size the lov_xattr_size passed in + * \param[in] flags flags to control how layout is retrieved * * \retval valid llapi_layout pointer on success * \retval NULL if memory allocation fails */ -static struct llapi_layout * -llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) +struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr, + ssize_t lov_xattr_size, + enum llapi_layout_get_flags flags) { + struct lov_user_md *lum = lov_xattr; struct lov_comp_md_v1 *comp_v1 = NULL; struct lov_comp_md_entry_v1 *ent; struct lov_user_md *v1; - struct llapi_layout *layout; + struct llapi_layout *layout = NULL; struct llapi_layout_comp *comp; int i, ent_count = 0, obj_count; - layout = __llapi_layout_alloc(); - if (layout == NULL) + if (lov_xattr == NULL || lov_xattr_size <= 0) { + errno = EINVAL; + return NULL; + } + + /* Return an error if we got back a partial layout. */ + if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) { + errno = ERANGE; return NULL; + } + +#if __BYTE_ORDER == __BIG_ENDIAN + if (flags & LLAPI_LAYOUT_GET_COPY) { + lum = malloc(lov_xattr_size); + if (lum == NULL) { + errno = ENOMEM; + return NULL; + } + memcpy(lum, lov_xattr, lov_xattr_size); + } +#endif + + llapi_layout_swab_lov_user_md(lum, lov_xattr_size); + +#if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 16, 53, 0) +#define LLAPI_LXF_CHECK_OLD 0x0001 + if (flags & LLAPI_LXF_CHECK_OLD) + flags = (flags & ~LLAPI_LXF_CHECK_OLD) | LLAPI_LAYOUT_GET_CHECK; +#endif + if ((flags & LLAPI_LAYOUT_GET_CHECK) && + !llapi_layout_lum_valid(lum, lov_xattr_size)) { + errno = EBADSLT; + goto out; + } + + layout = __llapi_layout_alloc(); + if (layout == NULL) { + errno = ENOMEM; + goto out; + } if (lum->lmm_magic == LOV_MAGIC_COMP_V1) { comp_v1 = (struct lov_comp_md_v1 *)lum; @@ -390,11 +504,19 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) lum->lmm_magic == LOV_MAGIC_V3) { ent_count = 1; layout->llot_is_composite = false; + + if (lov_xattr_size <= 0) { + errno = EINVAL; + goto out_layout; + } + } else { + errno = EOPNOTSUPP; + goto out_layout; } if (ent_count == 0) { errno = EINVAL; - goto error; + goto out_layout; } v1 = (struct lov_user_md *)lum; @@ -403,21 +525,23 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) ent = &comp_v1->lcm_entries[i]; v1 = (struct lov_user_md *)((char *)comp_v1 + ent->lcme_offset); - lum_size = ent->lcme_size; + lov_xattr_size = ent->lcme_size; } else { ent = NULL; } - obj_count = llapi_layout_objects_in_lum(v1, lum_size); + obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size); comp = __llapi_comp_alloc(obj_count); if (comp == NULL) - goto error; + goto out_layout; if (ent != NULL) { comp->llc_extent.e_start = ent->lcme_extent.e_start; comp->llc_extent.e_end = ent->lcme_extent.e_end; comp->llc_id = ent->lcme_id; comp->llc_flags = ent->lcme_flags; + if (comp->llc_flags & LCME_FL_NOSYNC) + comp->llc_timestamp = ent->lcme_timestamp; } else { comp->llc_extent.e_start = 0; comp->llc_extent.e_end = LUSTRE_EOF; @@ -427,8 +551,15 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) if (v1->lmm_pattern == LOV_PATTERN_RAID0) comp->llc_pattern = LLAPI_LAYOUT_RAID0; + else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 | + LOV_PATTERN_OVERSTRIPING)) + comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING; + else if (v1->lmm_pattern == LOV_PATTERN_MDT) + comp->llc_pattern = LLAPI_LAYOUT_MDT; else - /* Lustre only supports RAID0 for now. */ + /* Lustre only supports RAID0, overstripping + * and DoM for now. + */ comp->llc_pattern = v1->lmm_pattern; if (v1->lmm_stripe_size == 0) @@ -468,14 +599,43 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size) comp->llc_stripe_offset = comp->llc_objects[0].l_ost_idx; + comp->llc_ondisk = true; list_add_tail(&comp->llc_list, &layout->llot_comp_list); layout->llot_cur_comp = comp; } +out: + if (lum != lov_xattr) + free(lum); return layout; -error: +out_layout: llapi_layout_free(layout); - return NULL; + layout = NULL; + goto out; +} + +__u32 llapi_pattern_to_lov(uint64_t pattern) +{ + __u32 lov_pattern; + + switch (pattern) { + case LLAPI_LAYOUT_DEFAULT: + lov_pattern = LOV_PATTERN_RAID0; + break; + case LLAPI_LAYOUT_RAID0: + lov_pattern = LOV_PATTERN_RAID0; + break; + case LLAPI_LAYOUT_MDT: + lov_pattern = LOV_PATTERN_MDT; + break; + case LLAPI_LAYOUT_OVERSTRIPING: + lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0; + break; + default: + lov_pattern = EINVAL; + } + + return lov_pattern; } /** @@ -572,12 +732,11 @@ llapi_layout_to_lum(const struct llapi_layout *layout) } blob->lmm_magic = magic; - if (pattern == LLAPI_LAYOUT_DEFAULT) - blob->lmm_pattern = LOV_PATTERN_RAID0; - else if (pattern == LLAPI_LAYOUT_MDT) - blob->lmm_pattern = LOV_PATTERN_MDT; - else - blob->lmm_pattern = pattern; + blob->lmm_pattern = llapi_pattern_to_lov(pattern); + if (blob->lmm_pattern == EINVAL) { + errno = EINVAL; + goto error; + } if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT) blob->lmm_stripe_size = 0; @@ -622,6 +781,8 @@ llapi_layout_to_lum(const struct llapi_layout *layout) ent = &comp_v1->lcm_entries[ent_idx]; ent->lcme_id = comp->llc_id; ent->lcme_flags = comp->llc_flags; + if (ent->lcme_flags & LCME_FL_NOSYNC) + ent->lcme_timestamp = comp->llc_timestamp; ent->lcme_extent.e_start = comp->llc_extent.e_start; ent->lcme_extent.e_end = comp->llc_extent.e_end; ent->lcme_size = blob_size; @@ -651,7 +812,7 @@ static void get_parent_dir(const char *path, char *buf, size_t size) { char *p; - strncpy(buf, path, size); + strncpy(buf, path, size - 1); p = strrchr(buf, '/'); if (p != NULL) { @@ -749,77 +910,6 @@ static bool is_any_specified(const struct llapi_layout *layout) } /** - * Check if the given \a lum_size is large enough to hold the required - * fields in \a lum. - * - * \param[in] lum the struct lov_user_md to check - * \param[in] lum_size the number of bytes in \a lum - * - * \retval true the \a lum_size is too small - * \retval false the \a lum_size is large enough - */ -static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size) -{ - uint32_t magic; - - if (lum_size < sizeof(lum->lmm_magic)) - return true; - - if (lum->lmm_magic == LOV_MAGIC_V1 || - lum->lmm_magic == __swab32(LOV_MAGIC_V1)) - magic = LOV_MAGIC_V1; - else if (lum->lmm_magic == LOV_MAGIC_V3 || - lum->lmm_magic == __swab32(LOV_MAGIC_V3)) - magic = LOV_MAGIC_V3; - else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 || - lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1)) - magic = LOV_MAGIC_COMP_V1; - else - return true; - - if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3) - return lum_size < lov_user_md_size(0, magic); - else - return lum_size < sizeof(struct lov_comp_md_v1); -} - -/* Verify if the objects count in lum is consistent with the - * stripe count in lum. It applies to regular file only. */ -static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size) -{ - struct lov_comp_md_v1 *comp_v1 = NULL; - int i, ent_count, obj_count; - - if (lum->lmm_magic == LOV_MAGIC_COMP_V1) { - comp_v1 = (struct lov_comp_md_v1 *)lum; - ent_count = comp_v1->lcm_entry_count; - } else if (lum->lmm_magic == LOV_MAGIC_V1 || - lum->lmm_magic == LOV_MAGIC_V3) { - ent_count = 1; - } else { - return false; - } - - for (i = 0; i < ent_count; i++) { - if (comp_v1) { - lum = (struct lov_user_md *)((char *)comp_v1 + - comp_v1->lcm_entries[i].lcme_offset); - lum_size = comp_v1->lcm_entries[i].lcme_size; - } - obj_count = llapi_layout_objects_in_lum(lum, lum_size); - - if (comp_v1) { - if (!(comp_v1->lcm_entries[i].lcme_flags & - LCME_FL_INIT) && obj_count != 0) - return false; - } else if (obj_count != lum->lmm_stripe_count) { - return false; - } - } - return true; -} - -/** * Get the striping layout for the file referenced by file descriptor \a fd. * * If the filesystem does not support the "lustre." xattr namespace, the @@ -836,7 +926,8 @@ static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size) * \retval valid llapi_layout pointer on success * \retval NULL if an error occurs */ -struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags) +struct llapi_layout *llapi_layout_get_by_fd(int fd, + enum llapi_layout_get_flags flags) { size_t lum_len; struct lov_user_md *lum; @@ -858,14 +949,6 @@ struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags) goto out; } - /* Return an error if we got back a partial layout. */ - if (llapi_layout_lum_truncated(lum, bytes_read)) { - errno = EINTR; - goto out; - } - - llapi_layout_swab_lov_user_md(lum, bytes_read); - /* Directories may have a positive non-zero lum->lmm_stripe_count * yet have an empty lum->lmm_objects array. For non-directories the * amount of data returned from the kernel must be consistent @@ -873,12 +956,8 @@ struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags) if (fstat(fd, &st) < 0) goto out; - if (!S_ISDIR(st.st_mode) && !llapi_layout_lum_valid(lum, bytes_read)) { - errno = EINTR; - goto out; - } - - layout = llapi_layout_from_lum(lum, bytes_read); + layout = llapi_layout_get_by_xattr(lum, bytes_read, + S_ISDIR(st.st_mode) ? 0 : LLAPI_LAYOUT_GET_CHECK); out: free(lum); return layout; @@ -976,7 +1055,7 @@ static struct llapi_layout *llapi_layout_expected(const char *path) /** * Get the striping layout for the file at \a path. * - * If \a flags contains LAYOUT_GET_EXPECTED, substitute + * If \a flags contains LLAPI_LAYOUT_GET_EXPECTED, substitute * expected inherited attribute values for unspecified attributes. See * llapi_layout_expected(). * @@ -986,13 +1065,14 @@ static struct llapi_layout *llapi_layout_expected(const char *path) * \retval valid llapi_layout pointer on success * \retval NULL if an error occurs */ -struct llapi_layout *llapi_layout_get_by_path(const char *path, uint32_t flags) +struct llapi_layout *llapi_layout_get_by_path(const char *path, + enum llapi_layout_get_flags flags) { struct llapi_layout *layout = NULL; int fd; int tmp; - if (flags & LAYOUT_GET_EXPECTED) + if (flags & LLAPI_LAYOUT_GET_EXPECTED) return llapi_layout_expected(path); fd = open(path, O_RDONLY); @@ -1018,7 +1098,7 @@ struct llapi_layout *llapi_layout_get_by_path(const char *path, uint32_t flags) */ struct llapi_layout *llapi_layout_get_by_fid(const char *lustre_dir, const struct lu_fid *fid, - uint32_t flags) + enum llapi_layout_get_flags flags) { int fd; int tmp; @@ -1077,7 +1157,7 @@ int llapi_layout_stripe_count_get(const struct llapi_layout *layout, * the old API uses 0 and -1. */ -static bool llapi_layout_stripe_count_is_valid(int64_t stripe_count) +bool llapi_layout_stripe_count_is_valid(int64_t stripe_count) { return stripe_count == LLAPI_LAYOUT_DEFAULT || stripe_count == LLAPI_LAYOUT_WIDE || @@ -1085,6 +1165,13 @@ static bool llapi_layout_stripe_count_is_valid(int64_t stripe_count) llapi_stripe_count_is_valid(stripe_count)); } +static bool llapi_layout_extension_size_is_valid(uint64_t ext_size) +{ + return (ext_size != 0 && + llapi_stripe_size_is_aligned(ext_size) && + !llapi_stripe_size_is_too_big(ext_size)); +} + static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size) { return stripe_size == LLAPI_LAYOUT_DEFAULT || @@ -1129,18 +1216,20 @@ int llapi_layout_stripe_count_set(struct llapi_layout *layout, } /** - * Get the stripe size of \a layout. + * Get the stripe/extension size of \a layout. * * \param[in] layout layout to get stripe size from * \param[out] size integer to store stripe size in + * \param[in] extension flag if extenion size is requested * * \retval 0 on success * \retval -1 if arguments are invalid */ -int llapi_layout_stripe_size_get(const struct llapi_layout *layout, - uint64_t *size) +static int layout_stripe_size_get(const struct llapi_layout *layout, + uint64_t *size, bool extension) { struct llapi_layout_comp *comp; + int comp_ext; comp = __llapi_layout_cur_comp(layout); if (comp == NULL) @@ -1151,39 +1240,82 @@ int llapi_layout_stripe_size_get(const struct llapi_layout *layout, return -1; } + comp_ext = comp->llc_flags & LCME_FL_EXTENSION; + if ((comp_ext && !extension) || (!comp_ext && extension)) { + errno = EINVAL; + return -1; + } + *size = comp->llc_stripe_size; + if (comp->llc_flags & LCME_FL_EXTENSION) + *size *= SEL_UNIT_SIZE; return 0; } +int llapi_layout_stripe_size_get(const struct llapi_layout *layout, + uint64_t *size) +{ + return layout_stripe_size_get(layout, size, false); +} + +int llapi_layout_extension_size_get(const struct llapi_layout *layout, + uint64_t *size) +{ + return layout_stripe_size_get(layout, size, true); +} + /** - * Set the stripe size of \a layout. + * Set the stripe/extension size of \a layout. * * \param[in] layout layout to set stripe size in * \param[in] size value to be set + * \param[in] extension flag if extenion size is passed * * \retval 0 on success * \retval -1 if arguments are invalid */ -int llapi_layout_stripe_size_set(struct llapi_layout *layout, - uint64_t size) +static int layout_stripe_size_set(struct llapi_layout *layout, + uint64_t size, bool extension) { struct llapi_layout_comp *comp; + int comp_ext; comp = __llapi_layout_cur_comp(layout); if (comp == NULL) return -1; - if (!llapi_layout_stripe_size_is_valid(size)) { + comp_ext = comp->llc_flags & LCME_FL_EXTENSION; + if ((comp_ext && !extension) || (!comp_ext && extension)) { errno = EINVAL; return -1; } - comp->llc_stripe_size = size; + if (comp_ext) + size /= SEL_UNIT_SIZE; + + if ((comp_ext && !llapi_layout_extension_size_is_valid(size)) || + (!comp_ext && !llapi_layout_stripe_size_is_valid(size))) { + errno = EINVAL; + return -1; + } + comp->llc_stripe_size = size; return 0; } +int llapi_layout_stripe_size_set(struct llapi_layout *layout, + uint64_t size) +{ + return layout_stripe_size_set(layout, size, false); +} + +int llapi_layout_extension_size_set(struct llapi_layout *layout, + uint64_t size) +{ + return layout_stripe_size_set(layout, size, true); +} + /** * Get the RAID pattern of \a layout. * @@ -1231,7 +1363,8 @@ int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern) return -1; if (pattern != LLAPI_LAYOUT_DEFAULT && - pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT) { + pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT + && pattern != LLAPI_LAYOUT_OVERSTRIPING) { errno = EOPNOTSUPP; return -1; } @@ -1393,32 +1526,20 @@ int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest, * \retval -1 if arguments are invalid or pool name is too long */ int llapi_layout_pool_name_set(struct llapi_layout *layout, - const char *pool_name) + char *pool_name) { struct llapi_layout_comp *comp; - char *ptr; comp = __llapi_layout_cur_comp(layout); if (comp == NULL) return -1; - if (pool_name == NULL) { - errno = EINVAL; - return -1; - } - - /* Strip off any 'fsname.' portion. */ - ptr = strchr(pool_name, '.'); - if (ptr != NULL) - pool_name = ptr + 1; - - if (strlen(pool_name) > LOV_MAXPOOLNAME) { + if (!llapi_pool_name_is_valid(&pool_name, NULL)) { errno = EINVAL; return -1; } strncpy(comp->llc_pool_name, pool_name, sizeof(comp->llc_pool_name)); - return 0; } @@ -1431,7 +1552,7 @@ int llapi_layout_pool_name_set(struct llapi_layout *layout, * * \param[in] path name of the file to open * \param[in] open_flags open() flags - * \param[in] mode permissions to create new file with + * \param[in] mode permissions to create file, filtered by umask * \param[in] layout layout to create new file with * * \retval non-negative file descriptor on successful open @@ -1452,6 +1573,16 @@ int llapi_layout_file_open(const char *path, int open_flags, mode_t mode, return -1; } + if (layout) { + rc = llapi_layout_sanity((struct llapi_layout *)layout, + path, false, + !!(layout->llot_mirror_count > 1)); + if (rc) { + llapi_layout_sanity_perror(rc); + return -1; + } + } + /* Object creation must be postponed until after layout attributes * have been applied. */ if (layout != NULL && (open_flags & O_CREAT)) @@ -1553,6 +1684,18 @@ const char *llapi_layout_flags_string(uint32_t flags) return "0"; } +__u16 llapi_layout_string_flags(char *string) +{ + if (strncmp(string, "ro", strlen(string)) == 0) + return LCM_FL_RDONLY; + if (strncmp(string, "wp", strlen(string)) == 0) + return LCM_FL_WRITE_PENDING; + if (strncmp(string, "sp", strlen(string)) == 0) + return LCM_FL_SYNC_PENDING; + + return 0; +} + /** * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count. * @count: Mirror count value to be checked. @@ -1657,39 +1800,17 @@ int llapi_layout_comp_extent_get(const struct llapi_layout *layout, int llapi_layout_comp_extent_set(struct llapi_layout *layout, uint64_t start, uint64_t end) { - struct llapi_layout_comp *prev, *next, *comp; + struct llapi_layout_comp *comp; comp = __llapi_layout_cur_comp(layout); if (comp == NULL) return -1; - if (start >= end) { + if (start > end) { errno = EINVAL; return -1; } - /* - * We need to make sure the extent to be set is valid: the new - * extent must be adjacent with the prev & next component. - */ - if (comp->llc_list.prev != &layout->llot_comp_list) { - prev = list_entry(comp->llc_list.prev, typeof(*prev), - llc_list); - if (start != prev->llc_extent.e_end) { - errno = EINVAL; - return -1; - } - } - - if (comp->llc_list.next != &layout->llot_comp_list) { - next = list_entry(comp->llc_list.next, typeof(*next), - llc_list); - if (end != next->llc_extent.e_start) { - errno = EINVAL; - return -1; - } - } - comp->llc_extent.e_start = start; comp->llc_extent.e_end = end; layout->llot_is_composite = true; @@ -1838,6 +1959,7 @@ int llapi_layout_mirror_id_get(const struct llapi_layout *layout, uint32_t *id) int llapi_layout_comp_add(struct llapi_layout *layout) { struct llapi_layout_comp *last, *comp, *new; + bool composite = layout->llot_is_composite; comp = __llapi_layout_cur_comp(layout); if (comp == NULL) @@ -1850,12 +1972,50 @@ int llapi_layout_comp_add(struct llapi_layout *layout) last = list_entry(layout->llot_comp_list.prev, typeof(*last), llc_list); - if (new->llc_extent.e_end <= last->llc_extent.e_end) { - __llapi_comp_free(new); - errno = EINVAL; + list_add_tail(&new->llc_list, &layout->llot_comp_list); + + /* We must mark the layout composite for the sanity check, but it may + * not stay that way if the check fails */ + layout->llot_is_composite = true; + layout->llot_cur_comp = new; + + /* We need to set a temporary non-zero value for "end" when we call + * comp_extent_set, so we use LUSTRE_EOF-1, which is > all allowed + * for the end of the previous component. (If we're adding this + * component, the end of the previous component cannot be EOF.) */ + if (llapi_layout_comp_extent_set(layout, last->llc_extent.e_end, + LUSTRE_EOF - 1)) { + llapi_layout_comp_del(layout); + layout->llot_is_composite = composite; return -1; } - new->llc_extent.e_start = last->llc_extent.e_end; + + return 0; +} +/** + * Adds a first component of a mirror to \a layout. + * The \a layout will change it's current component pointer to + * the newly added component, and it'll be turned into a composite + * layout if it was not before the adding. + * + * \param[in] layout existing composite or plain layout + * + * \retval 0 on success + * \retval <0 if error occurs + */ +int llapi_layout_add_first_comp(struct llapi_layout *layout) +{ + struct llapi_layout_comp *comp, *new; + + comp = __llapi_layout_cur_comp(layout); + if (comp == NULL) + return -1; + + new = __llapi_comp_alloc(0); + if (new == NULL) + return -1; + + new->llc_extent.e_start = 0; list_add_tail(&new->llc_list, &layout->llot_comp_list); layout->llot_cur_comp = new; @@ -1893,14 +2053,11 @@ int llapi_layout_comp_del(struct llapi_layout *layout) errno = EINVAL; return -1; } - /* It can't be the only one on the list */ - if (comp->llc_list.prev == &layout->llot_comp_list) { - errno = EINVAL; - return -1; - } - layout->llot_cur_comp = list_entry(comp->llc_list.prev, typeof(*comp), llc_list); + if (comp->llc_list.prev == &layout->llot_comp_list) + layout->llot_cur_comp = NULL; + list_del_init(&comp->llc_list); __llapi_comp_free(comp); @@ -2018,8 +2175,9 @@ int llapi_layout_comp_use(struct llapi_layout *layout, int llapi_layout_file_comp_add(const char *path, const struct llapi_layout *layout) { - int rc, fd, lum_size, tmp_errno = 0; - struct lov_user_md *lum; + int rc, fd = -1, lum_size, tmp_errno = 0; + struct llapi_layout *existing_layout = NULL; + struct lov_user_md *lum = NULL; if (path == NULL || layout == NULL || layout->llot_magic != LLAPI_LAYOUT_MAGIC) { @@ -2027,34 +2185,60 @@ int llapi_layout_file_comp_add(const char *path, return -1; } - lum = llapi_layout_to_lum(layout); - if (lum == NULL) - return -1; + fd = open(path, O_RDWR); + if (fd < 0) { + tmp_errno = errno; + rc = -1; + goto out; + } - if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) { - free(lum); - errno = EINVAL; - return -1; + existing_layout = llapi_layout_get_by_fd(fd, 0); + if (existing_layout == NULL) { + tmp_errno = errno; + rc = -1; + goto out; } - lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size; - fd = open(path, O_RDWR); - if (fd < 0) { + rc = llapi_layout_merge(&existing_layout, layout); + if (rc) { + tmp_errno = errno; + rc = -1; + goto out; + } + + rc = llapi_layout_sanity(existing_layout, path, false, false); + if (rc) { + tmp_errno = errno; + llapi_layout_sanity_perror(rc); + rc = -1; + goto out; + } + + lum = llapi_layout_to_lum(layout); + if (lum == NULL) { tmp_errno = errno; rc = -1; goto out; } + if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) { + tmp_errno = EINVAL; + rc = -1; + goto out; + } + lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size; + rc = fsetxattr(fd, XATTR_LUSTRE_LOV".add", lum, lum_size, 0); if (rc < 0) { tmp_errno = errno; - close(fd); rc = -1; goto out; } - close(fd); out: + if (fd >= 0) + close(fd); free(lum); + llapi_layout_free(existing_layout); errno = tmp_errno; return rc; } @@ -2070,18 +2254,20 @@ out: */ int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags) { - int rc, fd, lum_size; + int rc = 0, fd = -1, lum_size, tmp_errno = 0; struct llapi_layout *layout; - struct llapi_layout_comp *comp; - struct lov_user_md *lum; + struct llapi_layout_comp *comp, *next; + struct llapi_layout *existing_layout = NULL; + struct lov_user_md *lum = NULL; if (path == NULL || id > LCME_ID_MAX || (flags & ~LCME_KNOWN_FLAGS)) { errno = EINVAL; return -1; } - /* Can only specify ID or flags, not both. */ - if (id != 0 && flags != 0) { + /* Can only specify ID or flags, not both, not none. */ + if ((id != LCME_ID_INVAL && flags != 0) || + (id == LCME_ID_INVAL && flags == 0)) { errno = EINVAL; return -1; } @@ -2093,8 +2279,9 @@ int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags) llapi_layout_comp_extent_set(layout, 0, LUSTRE_EOF); comp = __llapi_layout_cur_comp(layout); if (comp == NULL) { - llapi_layout_free(layout); - return -1; + tmp_errno = errno; + rc = -1; + goto out; } comp->llc_id = id; @@ -2102,38 +2289,155 @@ int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags) lum = llapi_layout_to_lum(layout); if (lum == NULL) { - llapi_layout_free(layout); - return -1; + tmp_errno = errno; + rc = -1; + goto out; } lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size; fd = open(path, O_RDWR); if (fd < 0) { + tmp_errno = errno; + rc = -1; + goto out; + } + + existing_layout = llapi_layout_get_by_fd(fd, 0); + if (existing_layout == NULL) { + tmp_errno = errno; + rc = -1; + goto out; + } + + comp = NULL; + next = NULL; + while (rc == 0 && existing_layout->llot_cur_comp != NULL) { + rc = llapi_layout_comp_use(existing_layout, comp ? + LLAPI_LAYOUT_COMP_USE_PREV : + LLAPI_LAYOUT_COMP_USE_LAST); + if (rc != 0) + break; + + next = comp; + comp = __llapi_layout_cur_comp(existing_layout); + if (comp == NULL) { + rc = -1; + break; + } + + if (id != LCME_ID_INVAL && id != comp->llc_id) + continue; + else if ((flags & LCME_FL_NEG) && (flags & comp->llc_flags)) + continue; + else if (flags && !(flags & comp->llc_flags)) + continue; + + rc = llapi_layout_comp_del(existing_layout); + /* the layout position is moved to previous one, adjust */ + comp = next; + } + if (rc < 0) { + tmp_errno = errno; + goto out; + } + + rc = llapi_layout_sanity(existing_layout, path, false, false); + if (rc) { + tmp_errno = errno; + llapi_layout_sanity_perror(rc); rc = -1; goto out; } rc = fsetxattr(fd, XATTR_LUSTRE_LOV".del", lum, lum_size, 0); if (rc < 0) { - int tmp_errno = errno; - close(fd); - errno = tmp_errno; + tmp_errno = errno; rc = -1; goto out; } - close(fd); + out: + if (fd >= 0) + close(fd); free(lum); llapi_layout_free(layout); + llapi_layout_free(existing_layout); + errno = tmp_errno; + return rc; } +/* Internal utility function to apply flags for sanity checking */ +static void llapi_layout_comp_apply_flags(struct llapi_layout_comp *comp, + uint32_t flags) +{ + if (flags & LCME_FL_NEG) + comp->llc_flags &= ~flags; + else + comp->llc_flags |= flags; +} + +struct llapi_layout_apply_flags_args { + uint32_t *lfa_ids; + uint32_t *lfa_flags; + int lfa_count; + int lfa_rc; +}; + + +static int llapi_layout_apply_flags_cb(struct llapi_layout *layout, + void *arg) +{ + struct llapi_layout_apply_flags_args *args = arg; + struct llapi_layout_comp *comp; + int i = 0; + + comp = __llapi_layout_cur_comp(layout); + if (comp == NULL) { + args->lfa_rc = -1; + return LLAPI_LAYOUT_ITER_STOP; + } + + for (i = 0; i < args->lfa_count; i++) { + if (comp->llc_id == args->lfa_ids[i]) + llapi_layout_comp_apply_flags(comp, args->lfa_flags[i]); + } + + return LLAPI_LAYOUT_ITER_CONT; +} + +/* Apply flags to the layout for sanity checking */ +static int llapi_layout_apply_flags(struct llapi_layout *layout, uint32_t *ids, + uint32_t *flags, int count) +{ + struct llapi_layout_apply_flags_args args; + int rc = 0; + + if (!ids || !flags || count == 0) { + errno = EINVAL; + return -1; + } + + args.lfa_ids = ids; + args.lfa_flags = flags; + args.lfa_count = count; + args.lfa_rc = 0; + + rc = llapi_layout_comp_iterate(layout, + llapi_layout_apply_flags_cb, + &args); + if (errno == ENOENT) + errno = 0; + + if (rc != LLAPI_LAYOUT_ITER_CONT) + rc = args.lfa_rc; + + return rc; +} /** - * Change flags or other parameters of the component(s) by component ID of an - * existing file. The component to be modified is specified by the - * comp->lcme_id value, which must be an unique component ID. The new - * attributes are passed in by @comp and @valid is used to specify which - * attributes in the component are going to be changed. + * Change flags by component ID of components of an existing file. + * The component to be modified is specified by the comp->lcme_id value, + * which must be a unique component ID. * * \param[in] path path name of the file * \param[in] ids An array of component IDs @@ -2144,9 +2448,10 @@ out: int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags, size_t count) { - int rc = -1, fd = -1, i; + int rc = -1, fd = -1, i, tmp_errno = 0; size_t lum_size; - struct llapi_layout *layout; + struct llapi_layout *existing_layout = NULL; + struct llapi_layout *layout = NULL; struct llapi_layout_comp *comp; struct lov_user_md *lum = NULL; @@ -2176,15 +2481,49 @@ int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags, } } + fd = open(path, O_RDWR); + if (fd < 0) { + tmp_errno = errno; + rc = -1; + goto out; + } + + existing_layout = llapi_layout_get_by_fd(fd, 0); + if (existing_layout == NULL) { + tmp_errno = errno; + rc = -1; + goto out; + } + + if (llapi_layout_apply_flags(existing_layout, ids, flags, count)) { + tmp_errno = errno; + rc = -1; + goto out; + } + + rc = llapi_layout_sanity(existing_layout, path, false, false); + if (rc) { + tmp_errno = errno; + llapi_layout_sanity_perror(rc); + rc = -1; + goto out; + } + layout = __llapi_layout_alloc(); - if (layout == NULL) - return -1; + if (layout == NULL) { + tmp_errno = errno; + rc = -1; + goto out; + } layout->llot_is_composite = true; for (i = 0; i < count; i++) { comp = __llapi_comp_alloc(0); - if (comp == NULL) + if (comp == NULL) { + tmp_errno = errno; + rc = -1; goto out; + } comp->llc_id = ids[i]; comp->llc_flags = flags[i]; @@ -2194,39 +2533,38 @@ int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags, } lum = llapi_layout_to_lum(layout); - if (lum == NULL) + if (lum == NULL) { + tmp_errno = errno; + rc = -1; goto out; + } lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size; - fd = open(path, O_RDWR); - if (fd < 0) - goto out; - /* flush cached pages from clients */ rc = llapi_file_flush(fd); if (rc) { - errno = -rc; + tmp_errno = -rc; rc = -1; - goto out_close; + goto out; } rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0); - if (rc < 0) - goto out_close; + if (rc < 0) { + tmp_errno = errno; + goto out; + } rc = 0; -out_close: - if (fd >= 0) { - int tmp_errno = errno; - close(fd); - errno = tmp_errno; - } out: - if (lum) - free(lum); + if (fd >= 0) + close(fd); + + free(lum); + llapi_layout_free(existing_layout); llapi_layout_free(layout); + errno = tmp_errno; return rc; } @@ -2360,6 +2698,71 @@ error: } /** + * Get the last initialized component + * + * \param[in] layout component layout list. + * + * \retval 0 found + * \retval -EINVAL not found + * \retval -EISDIR directory layout + */ +int llapi_layout_get_last_init_comp(struct llapi_layout *layout) +{ + struct llapi_layout_comp *comp = NULL, *head = NULL; + + if (!layout->llot_is_composite) + return 0; + + head = list_entry(layout->llot_comp_list.next, typeof(*comp), llc_list); + if (head == NULL) + return -EINVAL; + if (head->llc_id == 0 && !(head->llc_flags & LCME_FL_INIT)) + /* a directory */ + return -EISDIR; + + /* traverse the components from the tail to find the last init one */ + comp = list_entry(layout->llot_comp_list.prev, typeof(*comp), llc_list); + while (comp != head) { + if (comp->llc_flags & LCME_FL_INIT) + break; + comp = list_entry(comp->llc_list.prev, typeof(*comp), llc_list); + } + + layout->llot_cur_comp = comp; + + return comp->llc_flags & LCME_FL_INIT ? 0 : -EINVAL; +} + +/** + * Interit stripe info from the file's component to the mirror + * + * \param[in] layout file component layout list. + * \param[in] layout mirro component layout list. + * + * \retval 0 on success + * \retval -EINVAL on error + */ +int llapi_layout_mirror_inherit(struct llapi_layout *f_layout, + struct llapi_layout *m_layout) +{ + struct llapi_layout_comp *m_comp = NULL; + struct llapi_layout_comp *f_comp = NULL; + int rc = 0; + + f_comp = __llapi_layout_cur_comp(f_layout); + if (f_comp == NULL) + return -EINVAL; + m_comp = __llapi_layout_cur_comp(m_layout); + if (m_comp == NULL) + return -EINVAL; + + m_comp->llc_stripe_size = f_comp->llc_stripe_size; + m_comp->llc_stripe_count = f_comp->llc_stripe_count; + + return rc; +} + +/** * Find all stale components. * * \param[in] layout component layout list. @@ -2413,6 +2816,10 @@ int llapi_mirror_find_stale(struct llapi_layout *layout, /* not in the specified mirror */ if (j == ids_nr) goto next; + } else if (flags & LCME_FL_NOSYNC) { + /* if not specified mirrors, do not resync "nosync" + * mirrors */ + goto next; } rc = llapi_layout_comp_id_get(layout, &id); @@ -2447,9 +2854,9 @@ error: } /* locate @layout to a valid component covering file [file_start, file_end) */ -static uint32_t llapi_mirror_find(struct llapi_layout *layout, - uint64_t file_start, uint64_t file_end, - uint64_t *endp) +uint32_t llapi_mirror_find(struct llapi_layout *layout, + uint64_t file_start, uint64_t file_end, + uint64_t *endp) { uint32_t mirror_id = 0; int rc; @@ -2498,48 +2905,728 @@ static uint32_t llapi_mirror_find(struct llapi_layout *layout, if (rc < 0) return rc; } + if (!mirror_id) + return -ENOENT; return mirror_id; } -ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout, - uint32_t dst, uint64_t start, uint64_t end) +int llapi_mirror_resync_many(int fd, struct llapi_layout *layout, + struct llapi_resync_comp *comp_array, + int comp_size, uint64_t start, uint64_t end) { - uint64_t mirror_end = 0; - ssize_t result = 0; - size_t count; + size_t page_size = sysconf(_SC_PAGESIZE); + const size_t buflen = 4 << 20; /* 4M */ + void *buf; + uint64_t pos = start; + uint64_t data_off = pos, data_end = pos; + uint32_t src = 0; + int i; + int rc; + int rc2 = 0; - if (end == OBD_OBJECT_EOF) - count = OBD_OBJECT_EOF; - else - count = end - start; + rc = posix_memalign(&buf, page_size, buflen); + if (rc) + return -rc; + + while (pos < end) { + uint64_t mirror_end; + ssize_t bytes_read; + size_t to_read; + size_t to_write; + + if (pos >= data_end) { + off_t tmp_off; + size_t data_size; + + if (pos >= mirror_end || !src) { + rc = llapi_mirror_find(layout, pos, end, + &mirror_end); + if (rc < 0) + return rc; + src = rc; + /* restrict mirror end by resync end */ + mirror_end = MIN(end, mirror_end); + } - while (count > 0) { - uint32_t src; - size_t to_copy; - ssize_t copied; + tmp_off = llapi_mirror_data_seek(fd, src, pos, + &data_size); + if (tmp_off < 0) { + /* switch to full copy */ + to_read = mirror_end - pos; + goto do_read; + } + data_off = tmp_off; + data_end = data_off + data_size; - src = llapi_mirror_find(layout, start, end, &mirror_end); - if (src == 0) - return -ENOENT; + data_off = MIN(data_off, mirror_end); + data_end = MIN(data_end, mirror_end); - if (mirror_end == OBD_OBJECT_EOF) - to_copy = count; - else - to_copy = MIN(count, mirror_end - start); + /* align by page, if there is data block to copy */ + if (data_size) + data_off &= ~(page_size - 1); + } + + if (pos < data_off) { + for (i = 0; i < comp_size; i++) { + uint64_t cur_pos; + size_t to_punch; + uint32_t mid = comp_array[i].lrc_mirror_id; + + /* skip non-overlapped component */ + if (pos >= comp_array[i].lrc_end || + data_off <= comp_array[i].lrc_start) + continue; + + if (pos < comp_array[i].lrc_start) + cur_pos = comp_array[i].lrc_start; + else + cur_pos = pos; + + if (data_off > comp_array[i].lrc_end) + to_punch = comp_array[i].lrc_end - + cur_pos; + else + to_punch = data_off - cur_pos; + + if (comp_array[i].lrc_end == OBD_OBJECT_EOF) { + /* the last component can be truncated + * safely + */ + rc = llapi_mirror_truncate(fd, mid, + cur_pos); + /* hole at the end of file, so just + * truncate up to set size. + */ + if (!rc && data_off == data_end) + rc = llapi_mirror_truncate(fd, + mid, data_end); + } else { + rc = llapi_mirror_punch(fd, + comp_array[i].lrc_mirror_id, + cur_pos, to_punch); + } + /* if failed then read failed hole range */ + if (rc < 0) { + rc = 0; + pos = cur_pos; + if (pos + to_punch == data_off) + to_read = data_end - pos; + else + to_read = to_punch; + goto do_read; + } + } + pos = data_off; + } + if (pos == mirror_end) + continue; + to_read = data_end - pos; +do_read: + if (!to_read) + break; - copied = llapi_mirror_copy(fd, src, dst, start, to_copy); - if (copied < 0) - return copied; + assert(data_end <= mirror_end); - result += copied; - if (copied < to_copy) /* end of file */ + to_read = MIN(buflen, to_read); + to_read = ((to_read - 1) | (page_size - 1)) + 1; + bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos); + if (bytes_read == 0) { + /* end of file */ + break; + } + if (bytes_read < 0) { + rc = bytes_read; break; + } + + /* round up to page align to make direct IO happy. */ + to_write = ((bytes_read - 1) | (page_size - 1)) + 1; + + for (i = 0; i < comp_size; i++) { + ssize_t written; + off_t pos2 = pos; + size_t to_write2 = to_write; + + /* skip non-overlapped component */ + if (pos >= comp_array[i].lrc_end || + pos + to_write <= comp_array[i].lrc_start) + continue; + + if (pos < comp_array[i].lrc_start) + pos2 = comp_array[i].lrc_start; + + to_write2 -= pos2 - pos; + + if ((pos + to_write) > comp_array[i].lrc_end) + to_write2 -= pos + to_write - + comp_array[i].lrc_end; + + written = llapi_mirror_write(fd, + comp_array[i].lrc_mirror_id, + buf + pos2 - pos, + to_write2, pos2); + if (written < 0) { + /** + * this component is not written successfully, + * mark it using its lrc_synced, it is supposed + * to be false before getting here. + * + * And before this function returns, all + * elements of comp_array will reverse their + * lrc_synced flag to reflect their true + * meanings. + */ + comp_array[i].lrc_synced = true; + llapi_error(LLAPI_MSG_ERROR, written, + "component %u not synced", + comp_array[i].lrc_id); + if (rc2 == 0) + rc2 = (int)written; + continue; + } + assert(written == to_write2); + } + pos += bytes_read; + } + + free(buf); + + if (rc < 0) { + /* fatal error happens */ + for (i = 0; i < comp_size; i++) + comp_array[i].lrc_synced = false; + return rc; + } + + /** + * no fatal error happens, each lrc_synced tells whether the component + * has been resync successfully (note: we'd reverse the value to + * reflect its true meaning. + */ + for (i = 0; i < comp_size; i++) { + comp_array[i].lrc_synced = !comp_array[i].lrc_synced; + if (comp_array[i].lrc_synced && pos & (page_size - 1)) { + rc = llapi_mirror_truncate(fd, + comp_array[i].lrc_mirror_id, pos); + if (rc < 0) + comp_array[i].lrc_synced = false; + } + } + + /** + * returns the first error code for partially successful resync if + * possible. + */ + return rc2; +} + +enum llapi_layout_comp_sanity_error { + LSE_OK, + LSE_INCOMPLETE_MIRROR, + LSE_ADJACENT_EXTENSION, + LSE_INIT_EXTENSION, + LSE_FLAGS, + LSE_DOM_EXTENSION, + LSE_DOM_EXTENSION_FOLLOWING, + LSE_DOM_FIRST, + LSE_SET_COMP_START, + LSE_NOT_ZERO_LENGTH_EXTENDABLE, + LSE_END_NOT_GREATER, + LSE_ZERO_LENGTH_NORMAL, + LSE_NOT_ADJACENT_PREV, + LSE_START_GT_END, + LSE_ALIGN_END, + LSE_ALIGN_EXT, + LSE_UNKNOWN_OST, + LSE_LAST, +}; + +const char *const llapi_layout_strerror[] = +{ + [LSE_OK] = "", + [LSE_INCOMPLETE_MIRROR] = + "Incomplete mirror - must go to EOF", + [LSE_ADJACENT_EXTENSION] = + "No adjacent extension space components", + [LSE_INIT_EXTENSION] = + "Cannot apply extension flag to init components", + [LSE_FLAGS] = + "Wrong flags", + [LSE_DOM_EXTENSION] = + "DoM components can't be extension space", + [LSE_DOM_EXTENSION_FOLLOWING] = + "DoM components cannot be followed by extension space", + [LSE_DOM_FIRST] = + "DoM component should be the first one in a file/mirror", + [LSE_SET_COMP_START] = + "Must set previous component extent before adding next", + [LSE_NOT_ZERO_LENGTH_EXTENDABLE] = + "Extendable component must start out zero-length", + [LSE_END_NOT_GREATER] = + "Component end is before end of previous component", + [LSE_ZERO_LENGTH_NORMAL] = + "Zero length components must be followed by extension", + [LSE_NOT_ADJACENT_PREV] = + "Components not adjacent (end != next->start", + [LSE_START_GT_END] = + "Component start is > end", + [LSE_ALIGN_END] = + "The component end must be aligned by the stripe size", + [LSE_ALIGN_EXT] = + "The extension size must be aligned by the stripe size", + [LSE_UNKNOWN_OST] = + "An unknown OST idx is specified", +}; + +struct llapi_layout_sanity_args { + char lsa_fsname[MAX_OBD_NAME + 1]; + bool lsa_incomplete; + bool lsa_flr; + bool lsa_ondisk; + int lsa_rc; +}; + +/* The component flags can be set by users at creation/modification time. */ +#define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \ + LCME_FL_EXTENSION) + +/** + * When modified, adjust llapi_stripe_param_verify() if needed as well. + */ +static int llapi_layout_sanity_cb(struct llapi_layout *layout, + void *arg) +{ + struct llapi_layout_comp *comp, *next, *prev; + struct llapi_layout_sanity_args *args = arg; + bool first_comp = false; + + comp = __llapi_layout_cur_comp(layout); + if (comp == NULL) { + args->lsa_rc = -1; + goto out_err; + } + + if (comp->llc_list.prev != &layout->llot_comp_list) + prev = list_entry(comp->llc_list.prev, typeof(*prev), + llc_list); + else + prev = NULL; + + if (comp->llc_list.next != &layout->llot_comp_list) + next = list_entry(comp->llc_list.next, typeof(*next), + llc_list); + else + next = NULL; + + /* Start of zero implies a new mirror */ + if (comp->llc_extent.e_start == 0) { + first_comp = true; + /* Most checks apply only within one mirror, this is an + * exception. */ + if (prev && prev->llc_extent.e_end != LUSTRE_EOF) { + args->lsa_rc = LSE_INCOMPLETE_MIRROR; + goto out_err; + } + + prev = NULL; + } + + if (next && next->llc_extent.e_start == 0) + next = NULL; + + /* Flag sanity checks */ + /* No adjacent extension components */ + if ((comp->llc_flags & LCME_FL_EXTENSION) && next && + (next->llc_flags & LCME_FL_EXTENSION)) { + args->lsa_rc = LSE_ADJACENT_EXTENSION; + goto out_err; + } + + /* Extension flag cannot be applied to init components and the first + * component of each mirror is automatically init */ + if ((comp->llc_flags & LCME_FL_EXTENSION) && + (comp->llc_flags & LCME_FL_INIT || first_comp)) { + args->lsa_rc = LSE_INIT_EXTENSION; + goto out_err; + } + + if (comp->llc_ondisk) { + if (comp->llc_flags & LCME_FL_NEG) + args->lsa_rc = LSE_FLAGS; + } else if (!args->lsa_incomplete) { + if (args->lsa_flr) { + if (comp->llc_flags & ~LCME_USER_COMP_FLAGS) + args->lsa_rc = LSE_FLAGS; + } else { + if (comp->llc_flags & + ~(LCME_FL_EXTENSION | LCME_FL_PREF_RW)) + args->lsa_rc = LSE_FLAGS; + } + } + if (args->lsa_rc) + goto out_err; + + /* DoM sanity checks */ + if (comp->llc_pattern == LLAPI_LAYOUT_MDT || + comp->llc_pattern == LOV_PATTERN_MDT) { + /* DoM components can't be extension components */ + if (comp->llc_flags & LCME_FL_EXTENSION) { + args->lsa_rc = LSE_DOM_EXTENSION; + goto out_err; + } + /* DoM components cannot be followed by an extension comp */ + if (next && (next->llc_flags & LCME_FL_EXTENSION)) { + args->lsa_rc = LSE_DOM_EXTENSION_FOLLOWING; + goto out_err; + } + + /* DoM should be the first component in a mirror */ + if (!first_comp) { + args->lsa_rc = LSE_DOM_FIRST; + errno = EINVAL; + goto out_err; + } + } + + /* Extent sanity checks */ + /* Must set previous component extent before adding another */ + if (prev && prev->llc_extent.e_start == 0 && + prev->llc_extent.e_end == 0) { + args->lsa_rc = LSE_SET_COMP_START; + goto out_err; + } + + if (!args->lsa_incomplete) { + /* Components followed by extension space (extendable + * components) must be zero length before initialization. + * (Except for first comp, which will be initialized on + * creation). */ + if (next && (next->llc_flags & LCME_FL_EXTENSION) && + !first_comp && !(comp->llc_flags & LCME_FL_INIT) && + comp->llc_extent.e_start != comp->llc_extent.e_end) { + args->lsa_rc = LSE_NOT_ZERO_LENGTH_EXTENDABLE; + goto out_err; + } + + /* End must come after end of previous comp */ + if (prev && comp->llc_extent.e_end < prev->llc_extent.e_end) { + args->lsa_rc = LSE_END_NOT_GREATER; + goto out_err; + } + + /* Components not followed by ext space must have length > 0. */ + if (comp->llc_extent.e_start == comp->llc_extent.e_end && + (next == NULL || !(next->llc_flags & LCME_FL_EXTENSION))) { + args->lsa_rc = LSE_ZERO_LENGTH_NORMAL; + goto out_err; + } + + /* The component end must be aligned by the stripe size */ + if ((comp->llc_flags & LCME_FL_EXTENSION) && + (prev->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) { + if (comp->llc_extent.e_end != LUSTRE_EOF && + comp->llc_extent.e_end % prev->llc_stripe_size) { + args->lsa_rc = LSE_ALIGN_END; + goto out_err; + } + if ((comp->llc_stripe_size * SEL_UNIT_SIZE) % + prev->llc_stripe_size) { + args->lsa_rc = LSE_ALIGN_EXT; + goto out_err; + } + } else if (!(comp->llc_flags & LCME_FL_EXTENSION) && + (comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) { + if (comp->llc_extent.e_end != LUSTRE_EOF && + comp->llc_extent.e_end != + comp->llc_extent.e_start && + comp->llc_extent.e_end % comp->llc_stripe_size) { + args->lsa_rc = LSE_ALIGN_END; + goto out_err; + } + } + } + + /* Components must have start == prev->end */ + if (prev && comp->llc_extent.e_start != 0 && + comp->llc_extent.e_start != prev->llc_extent.e_end) { + args->lsa_rc = LSE_NOT_ADJACENT_PREV; + goto out_err; + } + + /* Components must have start <= end */ + if (comp->llc_extent.e_start > comp->llc_extent.e_end) { + args->lsa_rc = LSE_START_GT_END; + goto out_err; + } + + if (args->lsa_fsname[0] != '\0') { + int i, rc = 0; + + if (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC) { + assert(comp->llc_stripe_count <= + comp->llc_objects_count); + + for (i = 0; i < comp->llc_stripe_count && rc == 0; i++){ + if (comp->llc_objects[i].l_ost_idx == + LLAPI_LAYOUT_IDX_MAX) { + args->lsa_rc = -1; + goto out_err; + } + rc = llapi_layout_search_ost( + comp->llc_objects[i].l_ost_idx, + comp->llc_pool_name, args->lsa_fsname); + } + } else if (comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT) { + rc = llapi_layout_search_ost( + comp->llc_stripe_offset, + comp->llc_pool_name, args->lsa_fsname); + } + if (rc) { + args->lsa_rc = LSE_UNKNOWN_OST; + goto out_err; + } + } + + return LLAPI_LAYOUT_ITER_CONT; + +out_err: + errno = errno ? errno : EINVAL; + return LLAPI_LAYOUT_ITER_STOP; +} + +/* Print explanation of layout error */ +void llapi_layout_sanity_perror(int error) +{ + if (error >= LSE_LAST || error < 0) { + fprintf(stdout, "Invalid layout, unrecognized error: %d\n", + error); + } else { + fprintf(stdout, "Invalid layout: %s\n", + llapi_layout_strerror[error]); + } +} + +/* Walk a layout and enforce sanity checks that apply to > 1 component + * + * The core idea here is that of sanity checking individual tokens vs semantic + * checking. + * We cannot check everything at the individual component level ('token'), + * instead we must check whether or not the full layout has a valid meaning. + * + * An example of a component level check is "is stripe size valid?". That is + * handled when setting stripe size. + * + * An example of a layout level check is "are the extents of these components + * valid when adjacent to one another", or "can we set these flags on adjacent + * components"? + * + * \param[in] layout component layout list. + * \param[in] fname file the layout to be checked for + * \param[in] incomplete if layout is complete or not - some checks can + * only be done on complete layouts. + * \param[in] flr set when this is called from FLR mirror create + * + * \retval 0, success, positive: various errors, see + * llapi_layout_sanity_perror, -1, failure + */ +int llapi_layout_sanity(struct llapi_layout *layout, + const char *fname, + bool incomplete, + bool flr) +{ + struct llapi_layout_sanity_args args = { { 0 } }; + struct llapi_layout_comp *curr; + int rc = 0; + + if (!layout) + return 0; + + curr = layout->llot_cur_comp; + if (!curr) + return 0; + + /* Make sure we are on a Lustre file system */ + if (fname) { + rc = llapi_search_fsname(fname, args.lsa_fsname); + if (rc) { + llapi_error(LLAPI_MSG_ERROR, rc, + "'%s' is not on a Lustre filesystem", + fname); + return rc; + } + } + + /* Set up args */ + args.lsa_rc = 0; + args.lsa_flr = flr; + args.lsa_incomplete = incomplete; + + /* When we modify an existing layout, this tells us if it's FLR */ + if (mirror_id_of(curr->llc_id) > 0) + args.lsa_flr = true; + + errno = 0; + rc = llapi_layout_comp_iterate(layout, + llapi_layout_sanity_cb, + &args); + if (errno == ENOENT) + errno = 0; + + if (rc != LLAPI_LAYOUT_ITER_CONT) + rc = args.lsa_rc; + + layout->llot_cur_comp = curr; + + return rc; +} + +int llapi_layout_dom_size(struct llapi_layout *layout, uint64_t *size) +{ + uint64_t pattern, start; + int rc; + + if (!layout || !llapi_layout_is_composite(layout)) { + *size = 0; + return 0; + } + + rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST); + if (rc) + return -errno; + + rc = llapi_layout_pattern_get(layout, &pattern); + if (rc) + return -errno; + + if (pattern != LOV_PATTERN_MDT && pattern != LLAPI_LAYOUT_MDT) { + *size = 0; + return 0; + } + + rc = llapi_layout_comp_extent_get(layout, &start, size); + if (rc) + return -errno; + if (start) + return -ERANGE; + return 0; +} + +int lov_comp_md_size(struct lov_comp_md_v1 *lcm) +{ + if (lcm->lcm_magic == LOV_MAGIC_V1 || lcm->lcm_magic == LOV_MAGIC_V3) { + struct lov_user_md *lum = (void *)lcm; + + return lov_user_md_size(lum->lmm_stripe_count, lum->lmm_magic); + } + + if (lcm->lcm_magic == LOV_MAGIC_FOREIGN) { + struct lov_foreign_md *lfm = (void *)lcm; + + return lfm->lfm_length; + } + + if (lcm->lcm_magic != LOV_MAGIC_COMP_V1) + return -EOPNOTSUPP; + + return lcm->lcm_size; +} + +int llapi_get_lum_file_fd(int dir_fd, const char *fname, __u64 *valid, + lstatx_t *statx, struct lov_user_md *lum, + size_t lumsize) +{ + struct lov_user_mds_data *lmd; + char buf[65536 + offsetof(typeof(*lmd), lmd_lmm)]; + int parent_fd = -1; + int rc; - if (count != OBD_OBJECT_EOF) - count -= copied; - start += copied; + if (lum && lumsize < sizeof(*lum)) + return -EINVAL; + + /* If a file name is provided, it is relative to the parent directory */ + if (fname) { + parent_fd = dir_fd; + dir_fd = -1; } - return result; + lmd = (struct lov_user_mds_data *)buf; + rc = get_lmd_info_fd(fname, parent_fd, dir_fd, buf, sizeof(buf), + GET_LMD_INFO); + if (rc) + return rc; + + if (valid) + *valid = lmd->lmd_flags; + + if (statx) + memcpy(statx, &lmd->lmd_stx, sizeof(*statx)); + + if (lum) { + if (lmd->lmd_lmmsize > lumsize) + return -EOVERFLOW; + memcpy(lum, &lmd->lmd_lmm, lmd->lmd_lmmsize); + } + + return 0; +} + +int llapi_get_lum_dir_fd(int dir_fd, __u64 *valid, lstatx_t *statx, + struct lov_user_md *lum, size_t lumsize) +{ + return llapi_get_lum_file_fd(dir_fd, NULL, valid, statx, lum, lumsize); +} + +int llapi_get_lum_file(const char *path, __u64 *valid, lstatx_t *statx, + struct lov_user_md *lum, size_t lumsize) +{ + char parent[PATH_MAX]; + const char *fname; + char *tmp; + int offset; + int dir_fd; + int rc; + + tmp = strrchr(path, '/'); + if (!tmp) { + strncpy(parent, ".", sizeof(parent) - 1); + offset = -1; + } else { + strncpy(parent, path, tmp - path); + offset = tmp - path - 1; + parent[tmp - path] = 0; + } + + fname = path; + if (offset >= 0) + fname += offset + 2; + + dir_fd = open(parent, O_RDONLY); + if (dir_fd < 0) { + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path); + return rc; + } + + rc = llapi_get_lum_file_fd(dir_fd, fname, valid, statx, lum, lumsize); + close(dir_fd); + return rc; +} + +int llapi_get_lum_dir(const char *path, __u64 *valid, lstatx_t *statx, + struct lov_user_md *lum, size_t lumsize) +{ + int dir_fd; + int rc; + + dir_fd = open(path, O_RDONLY); + if (dir_fd < 0) { + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path); + return rc; + } + + rc = llapi_get_lum_dir_fd(dir_fd, valid, statx, lum, lumsize); + close(dir_fd); + return rc; }