+
+/**
+ * Find all stale components.
+ *
+ * \param[in] layout component layout list.
+ * \param[out] comp array of stale component info.
+ * \param[in] comp_size array size of @comp.
+ * \param[in] mirror_ids array of mirror id that only components
+ * belonging to these mirror will be collected.
+ * \param[in] ids_nr number of mirror ids array.
+ *
+ * \retval number of component info collected on sucess or
+ * an error code on failure.
+ */
+int llapi_mirror_find_stale(struct llapi_layout *layout,
+ struct llapi_resync_comp *comp, size_t comp_size,
+ __u16 *mirror_ids, int ids_nr)
+{
+ int idx = 0;
+ int rc;
+
+ rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
+ if (rc < 0)
+ goto error;
+
+ while (rc == 0) {
+ uint32_t id;
+ uint32_t mirror_id;
+ uint32_t flags;
+ uint64_t start, end;
+
+ rc = llapi_layout_comp_flags_get(layout, &flags);
+ if (rc < 0)
+ goto error;
+
+ if (!(flags & LCME_FL_STALE))
+ goto next;
+
+ rc = llapi_layout_mirror_id_get(layout, &mirror_id);
+ if (rc < 0)
+ goto error;
+
+ /* the caller only wants stale components from specific
+ * mirrors */
+ if (ids_nr > 0) {
+ int j;
+
+ for (j = 0; j < ids_nr; j++) {
+ if (mirror_ids[j] == mirror_id)
+ break;
+ }
+
+ /* not in the specified mirror */
+ if (j == ids_nr)
+ goto next;
+ } else if (flags & LCME_FL_NOSYNC) {
+ /* if not specified mirrors, do not resync "nosync"
+ * mirrors */
+ goto next;
+ }
+
+ rc = llapi_layout_comp_id_get(layout, &id);
+ if (rc < 0)
+ goto error;
+
+ rc = llapi_layout_comp_extent_get(layout, &start, &end);
+ if (rc < 0)
+ goto error;
+
+ /* pack this component into @comp array */
+ comp[idx].lrc_id = id;
+ comp[idx].lrc_mirror_id = mirror_id;
+ comp[idx].lrc_start = start;
+ comp[idx].lrc_end = end;
+ idx++;
+
+ if (idx >= comp_size) {
+ rc = -EINVAL;
+ goto error;
+ }
+
+ next:
+ rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
+ if (rc < 0) {
+ rc = -EINVAL;
+ goto error;
+ }
+ }
+error:
+ return rc < 0 ? rc : idx;
+}
+
+/* locate @layout to a valid component covering file [file_start, file_end) */
+uint32_t llapi_mirror_find(struct llapi_layout *layout,
+ uint64_t file_start, uint64_t file_end,
+ uint64_t *endp)
+{
+ uint32_t mirror_id = 0;
+ int rc;
+
+ rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
+ if (rc < 0)
+ return rc;
+
+ *endp = 0;
+ while (rc == 0) {
+ uint64_t start, end;
+ uint32_t flags, id, rid;
+
+ rc = llapi_layout_comp_flags_get(layout, &flags);
+ if (rc < 0)
+ return rc;
+
+ if (flags & LCME_FL_STALE)
+ goto next;
+
+ rc = llapi_layout_mirror_id_get(layout, &rid);
+ if (rc < 0)
+ return rc;
+
+ rc = llapi_layout_comp_id_get(layout, &id);
+ if (rc < 0)
+ return rc;
+
+ rc = llapi_layout_comp_extent_get(layout, &start, &end);
+ if (rc < 0)
+ return rc;
+
+ if (file_start >= start && file_start < end) {
+ if (!mirror_id)
+ mirror_id = rid;
+ else if (mirror_id != rid || *endp != start)
+ break;
+
+ file_start = *endp = end;
+ if (end >= file_end)
+ break;
+ }
+
+ next:
+ rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
+ if (rc < 0)
+ return rc;
+ }
+
+ return mirror_id;
+}
+
+int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
+ struct llapi_resync_comp *comp_array,
+ int comp_size, uint64_t start, uint64_t end)
+{
+ uint64_t count;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+ const size_t buflen = 4 << 20; /* 4M */
+ void *buf;
+ uint64_t pos = start;
+ int i;
+ int rc;
+ int rc2 = 0;
+
+ rc = posix_memalign(&buf, page_size, buflen);
+ if (rc)
+ return -rc;
+
+ if (end == OBD_OBJECT_EOF)
+ count = OBD_OBJECT_EOF;
+ else
+ count = end - start;
+
+ while (count > 0) {
+ uint32_t src;
+ uint64_t mirror_end = 0;
+ uint64_t bytes_left;
+ ssize_t bytes_read;
+ size_t to_read;
+ size_t to_write;
+
+ src = llapi_mirror_find(layout, pos, end, &mirror_end);
+ if (src == 0)
+ return -ENOENT;
+
+ if (mirror_end == OBD_OBJECT_EOF) {
+ bytes_left = count;
+ } else {
+ bytes_left = MIN(count, mirror_end - pos);
+ bytes_left = ((bytes_left - 1) | (page_size - 1)) + 1;
+ }
+ to_read = MIN(buflen, bytes_left);
+
+ bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
+ if (bytes_read == 0) {
+ /* end of file */
+ break;
+ }
+ if (bytes_read < 0) {
+ rc = bytes_read;
+ break;
+ }
+
+ /* round up to page align to make direct IO happy. */
+ to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
+
+ for (i = 0; i < comp_size; i++) {
+ ssize_t written;
+ off_t pos2 = pos;
+ size_t to_write2 = to_write;
+
+ /* skip non-overlapped component */
+ if (pos >= comp_array[i].lrc_end ||
+ pos + to_write <= comp_array[i].lrc_start)
+ continue;
+
+ if (pos < comp_array[i].lrc_start)
+ pos2 = comp_array[i].lrc_start;
+
+ to_write2 -= pos2 - pos;
+
+ if ((pos + to_write) > comp_array[i].lrc_end)
+ to_write2 -= pos + to_write -
+ comp_array[i].lrc_end;
+
+ written = llapi_mirror_write(fd,
+ comp_array[i].lrc_mirror_id,
+ buf + pos2 - pos,
+ to_write2, pos2);
+ if (written < 0) {
+ /**
+ * this component is not written successfully,
+ * mark it using its lrc_synced, it is supposed
+ * to be false before getting here.
+ *
+ * And before this function returns, all
+ * elements of comp_array will reverse their
+ * lrc_synced flag to reflect their true
+ * meanings.
+ */
+ comp_array[i].lrc_synced = true;
+ llapi_error(LLAPI_MSG_ERROR, written,
+ "component %u not synced\n",
+ comp_array[i].lrc_id);
+ if (rc2 == 0)
+ rc2 = (int)written;
+ continue;
+ }
+ assert(written == to_write2);
+ }
+
+ pos += bytes_read;
+ count -= bytes_read;
+ }
+
+ free(buf);
+
+ if (rc < 0) {
+ /* fatal error happens */
+ for (i = 0; i < comp_size; i++)
+ comp_array[i].lrc_synced = false;
+ return rc;
+ }
+
+ /**
+ * no fatal error happens, each lrc_synced tells whether the component
+ * has been resync successfully (note: we'd reverse the value to
+ * reflect its true meaning.
+ */
+ for (i = 0; i < comp_size; i++) {
+ comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
+ if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
+ rc = llapi_mirror_truncate(fd,
+ comp_array[i].lrc_mirror_id, pos);
+ if (rc < 0)
+ comp_array[i].lrc_synced = false;
+ }
+ }
+
+ /**
+ * returns the first error code for partially successful resync if
+ * possible.
+ */
+ return rc2;
+}
+
+enum llapi_layout_comp_sanity_error {
+ LSE_OK,
+ LSE_INCOMPLETE_MIRROR,
+ LSE_ADJACENT_EXTENSION,
+ LSE_INIT_EXTENSION,
+ LSE_FLAGS,
+ LSE_DOM_EXTENSION,
+ LSE_DOM_EXTENSION_FOLLOWING,
+ LSE_DOM_FLR,
+ LSE_SET_COMP_START,
+ LSE_NOT_ZERO_LENGTH_EXTENDABLE,
+ LSE_END_NOT_GREATER,
+ LSE_ZERO_LENGTH_NORMAL,
+ LSE_NOT_ADJACENT_PREV,
+ LSE_START_GT_END,
+ LSE_ALIGN_END,
+ LSE_ALIGN_EXT,
+ LSE_LAST,
+};
+
+const char *llapi_layout_strerror[] =
+{
+ [LSE_OK] = "",
+ [LSE_INCOMPLETE_MIRROR] =
+ "Incomplete mirror - must go to EOF",
+ [LSE_ADJACENT_EXTENSION] =
+ "No adjacent extension space components",
+ [LSE_INIT_EXTENSION] =
+ "Cannot apply extension flag to init components",
+ [LSE_FLAGS] =
+ "Wrong flags",
+ [LSE_DOM_EXTENSION] =
+ "DoM components can't be extension space",
+ [LSE_DOM_EXTENSION_FOLLOWING] =
+ "DoM components cannot be followed by extension space",
+ [LSE_DOM_FLR] =
+ "FLR and DoM are not supported together",
+ [LSE_SET_COMP_START] =
+ "Must set previous component extent before adding next",
+ [LSE_NOT_ZERO_LENGTH_EXTENDABLE] =
+ "Extendable component must start out zero-length",
+ [LSE_END_NOT_GREATER] =
+ "Component end is before end of previous component",
+ [LSE_ZERO_LENGTH_NORMAL] =
+ "Zero length components must be followed by extension",
+ [LSE_NOT_ADJACENT_PREV] =
+ "Components not adjacent (end != next->start",
+ [LSE_START_GT_END] =
+ "Component start is > end",
+ [LSE_ALIGN_END] =
+ "The component end must be aligned by the stripe size",
+ [LSE_ALIGN_EXT] =
+ "The extension size must be aligned by the stripe size",
+};
+
+struct llapi_layout_sanity_args {
+ bool lsa_incomplete;
+ bool lsa_flr;
+ bool lsa_ondisk;
+ int lsa_rc;
+};
+
+static int llapi_layout_sanity_cb(struct llapi_layout *layout,
+ void *arg)
+{
+ struct llapi_layout_comp *comp, *next, *prev;
+ struct llapi_layout_sanity_args *args = arg;
+ bool first_comp = false;
+
+ comp = __llapi_layout_cur_comp(layout);
+ if (comp == NULL) {
+ args->lsa_rc = -1;
+ goto out_err;
+ }
+
+ if (comp->llc_list.prev != &layout->llot_comp_list)
+ prev = list_entry(comp->llc_list.prev, typeof(*prev),
+ llc_list);
+ else
+ prev = NULL;
+
+ if (comp->llc_list.next != &layout->llot_comp_list)
+ next = list_entry(comp->llc_list.next, typeof(*next),
+ llc_list);
+ else
+ next = NULL;
+
+ /* Start of zero implies a new mirror */
+ if (comp->llc_extent.e_start == 0) {
+ first_comp = true;
+ /* Most checks apply only within one mirror, this is an
+ * exception. */
+ if (prev && prev->llc_extent.e_end != LUSTRE_EOF) {
+ args->lsa_rc = LSE_INCOMPLETE_MIRROR;
+ goto out_err;
+ }
+
+ prev = NULL;
+ }
+
+ if (next && next->llc_extent.e_start == 0)
+ next = NULL;
+
+ /* Flag sanity checks */
+ /* No adjacent extension components */
+ if ((comp->llc_flags & LCME_FL_EXTENSION) && next &&
+ (next->llc_flags & LCME_FL_EXTENSION)) {
+ args->lsa_rc = LSE_ADJACENT_EXTENSION;
+ goto out_err;
+ }
+
+ /* Extension flag cannot be applied to init components and the first
+ * component of each mirror is automatically init */
+ if ((comp->llc_flags & LCME_FL_EXTENSION) &&
+ (comp->llc_flags & LCME_FL_INIT || first_comp)) {
+ args->lsa_rc = LSE_INIT_EXTENSION;
+ goto out_err;
+ }
+
+ if (comp->llc_ondisk) {
+ if (comp->llc_flags & LCME_FL_NEG)
+ args->lsa_rc = LSE_FLAGS;
+ } else if (!args->lsa_incomplete) {
+ if (args->lsa_flr) {
+ if (comp->llc_flags & ~LCME_USER_COMP_FLAGS)
+ args->lsa_rc = LSE_FLAGS;
+ } else {
+ if (comp->llc_flags & ~LCME_FL_EXTENSION)
+ args->lsa_rc = LSE_FLAGS;
+ }
+ }
+ if (args->lsa_rc)
+ goto out_err;
+
+ /* DoM sanity checks */
+ if (comp->llc_pattern == LLAPI_LAYOUT_MDT ||
+ comp->llc_pattern == LOV_PATTERN_MDT) {
+ /* DoM components can't be extension components */
+ if (comp->llc_flags & LCME_FL_EXTENSION) {
+ args->lsa_rc = LSE_DOM_EXTENSION;
+ goto out_err;
+ }
+ /* DoM components cannot be followed by an extension comp */
+ if (next && (next->llc_flags & LCME_FL_EXTENSION)) {
+ args->lsa_rc = LSE_DOM_EXTENSION_FOLLOWING;
+ goto out_err;
+ }
+
+ /* DoM and FLR are not supported together */
+ if (args->lsa_flr && first_comp) {
+ args->lsa_rc = LSE_DOM_FLR;
+ errno = ENOTSUP;
+ goto out_err;
+ }
+ }
+
+ /* Extent sanity checks */
+ /* Must set previous component extent before adding another */
+ if (prev && prev->llc_extent.e_start == 0 &&
+ prev->llc_extent.e_end == 0) {
+ args->lsa_rc = LSE_SET_COMP_START;
+ goto out_err;
+ }
+
+ if (!args->lsa_incomplete) {
+ /* Components followed by extension space (extendable
+ * components) must be zero length before initialization.
+ * (Except for first comp, which will be initialized on
+ * creation). */
+ if (next && (next->llc_flags & LCME_FL_EXTENSION) &&
+ !first_comp && !(comp->llc_flags & LCME_FL_INIT) &&
+ comp->llc_extent.e_start != comp->llc_extent.e_end) {
+ args->lsa_rc = LSE_NOT_ZERO_LENGTH_EXTENDABLE;
+ goto out_err;
+ }
+
+ /* End must come after end of previous comp */
+ if (prev && comp->llc_extent.e_end < prev->llc_extent.e_end) {
+ args->lsa_rc = LSE_END_NOT_GREATER;
+ goto out_err;
+ }
+
+ /* Components not followed by ext space must have length > 0. */
+ if (comp->llc_extent.e_start == comp->llc_extent.e_end &&
+ (next == NULL || !(next->llc_flags & LCME_FL_EXTENSION))) {
+ args->lsa_rc = LSE_ZERO_LENGTH_NORMAL;
+ goto out_err;
+ }
+
+ /* The component end must be aligned by the stripe size */
+ if ((comp->llc_flags & LCME_FL_EXTENSION) &&
+ (prev->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
+ if (comp->llc_extent.e_end != LUSTRE_EOF &&
+ comp->llc_extent.e_end % prev->llc_stripe_size) {
+ args->lsa_rc = LSE_ALIGN_END;
+ goto out_err;
+ }
+ if ((comp->llc_stripe_size * SEL_UNIT_SIZE) %
+ prev->llc_stripe_size) {
+ args->lsa_rc = LSE_ALIGN_EXT;
+ goto out_err;
+ }
+ } else if (!(comp->llc_flags & LCME_FL_EXTENSION) &&
+ (comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
+ if (comp->llc_extent.e_end != LUSTRE_EOF &&
+ comp->llc_extent.e_end % comp->llc_stripe_size) {
+ args->lsa_rc = LSE_ALIGN_END;
+ goto out_err;
+ }
+ }
+ }
+
+ /* Components must have start == prev->end */
+ if (prev && comp->llc_extent.e_start != 0 &&
+ comp->llc_extent.e_start != prev->llc_extent.e_end) {
+ args->lsa_rc = LSE_NOT_ADJACENT_PREV;
+ goto out_err;
+ }
+
+ /* Components must have start <= end */
+ if (comp->llc_extent.e_start > comp->llc_extent.e_end) {
+ args->lsa_rc = LSE_START_GT_END;
+ goto out_err;
+ }
+
+ return LLAPI_LAYOUT_ITER_CONT;
+
+out_err:
+ errno = errno ? errno : EINVAL;
+ return LLAPI_LAYOUT_ITER_STOP;
+}
+
+/* Print explanation of layout error */
+void llapi_layout_sanity_perror(int error)
+{
+ if (error >= LSE_LAST || error < 0) {
+ fprintf(stdout, "Invalid layout, unrecognized error: %d\n",
+ error);
+ } else {
+ fprintf(stdout, "Invalid layout: %s\n",
+ llapi_layout_strerror[error]);
+ }
+}
+
+/* Walk a layout and enforce sanity checks that apply to > 1 component
+ *
+ * The core idea here is that of sanity checking individual tokens vs semantic
+ * checking.
+ * We cannot check everything at the individual component level ('token'),
+ * instead we must check whether or not the full layout has a valid meaning.
+ *
+ * An example of a component level check is "is stripe size valid?". That is
+ * handled when setting stripe size.
+ *
+ * An example of a layout level check is "are the extents of these components
+ * valid when adjacent to one another", or "can we set these flags on adjacent
+ * components"?
+ *
+ * \param[in] layout component layout list.
+ * \param[in] incomplete if layout is complete or not - some checks can
+ * only be done on complete layouts.
+ * \param[in] flr set when this is called from FLR mirror create
+ *
+ * \retval 0, success, positive: various errors, see
+ * llapi_layout_sanity_perror, -1, failure
+ */
+int llapi_layout_sanity(struct llapi_layout *layout, bool incomplete, bool flr)
+{
+ struct llapi_layout_sanity_args args;
+ struct llapi_layout_comp *curr;
+ int rc = 0;
+
+ if (!layout)
+ return 0;
+
+ curr = layout->llot_cur_comp;
+ if (!curr)
+ return 0;
+
+ /* Set up args */
+ args.lsa_rc = 0;
+ args.lsa_flr = flr;
+ args.lsa_incomplete = incomplete;
+
+ /* When we modify an existing layout, this tells us if it's FLR */
+ if (mirror_id_of(curr->llc_id) > 0)
+ args.lsa_flr = true;
+
+ errno = 0;
+ rc = llapi_layout_comp_iterate(layout,
+ llapi_layout_sanity_cb,
+ &args);
+ if (errno == ENOENT)
+ errno = 0;
+
+ if (rc != LLAPI_LAYOUT_ITER_CONT)
+ rc = args.lsa_rc;
+
+ layout->llot_cur_comp = curr;
+
+ return rc;
+}