struct lu_extent llc_extent; /* [start, end) of component */
uint32_t llc_id; /* unique ID of component */
uint32_t llc_flags; /* LCME_FL_* flags */
+ uint64_t llc_timestamp; /* snapshot timestamp */
struct list_head llc_list; /* linked to the llapi_layout
components list */
};
ent = &comp_v1->lcm_entries[i];
__swab32s(&ent->lcme_id);
__swab32s(&ent->lcme_flags);
+ __swab64s(&ent->lcme_timestamp);
__swab64s(&ent->lcme_extent.e_start);
__swab64s(&ent->lcme_extent.e_end);
__swab32s(&ent->lcme_offset);
}
/**
+ * Check if the given \a lum_size is large enough to hold the required
+ * fields in \a lum.
+ *
+ * \param[in] lum the struct lov_user_md to check
+ * \param[in] lum_size the number of bytes in \a lum
+ *
+ * \retval true the \a lum_size is too small
+ * \retval false the \a lum_size is large enough
+ */
+static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
+{
+ uint32_t magic;
+
+ if (lum_size < sizeof(lum->lmm_magic))
+ return true;
+
+ if (lum->lmm_magic == LOV_MAGIC_V1 ||
+ lum->lmm_magic == __swab32(LOV_MAGIC_V1))
+ magic = LOV_MAGIC_V1;
+ else if (lum->lmm_magic == LOV_MAGIC_V3 ||
+ lum->lmm_magic == __swab32(LOV_MAGIC_V3))
+ magic = LOV_MAGIC_V3;
+ else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
+ lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
+ magic = LOV_MAGIC_COMP_V1;
+ else
+ return true;
+
+ if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
+ return lum_size < lov_user_md_size(0, magic);
+ else
+ return lum_size < sizeof(struct lov_comp_md_v1);
+}
+
+/* Verify if the objects count in lum is consistent with the
+ * stripe count in lum. It applies to regular file only. */
+static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
+{
+ struct lov_comp_md_v1 *comp_v1 = NULL;
+ int i, ent_count, obj_count;
+
+ if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
+ comp_v1 = (struct lov_comp_md_v1 *)lum;
+ ent_count = comp_v1->lcm_entry_count;
+ } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
+ lum->lmm_magic == LOV_MAGIC_V3) {
+ ent_count = 1;
+ } else {
+ return false;
+ }
+
+ for (i = 0; i < ent_count; i++) {
+ if (comp_v1) {
+ lum = (struct lov_user_md *)((char *)comp_v1 +
+ comp_v1->lcm_entries[i].lcme_offset);
+ lum_size = comp_v1->lcm_entries[i].lcme_size;
+ }
+ obj_count = llapi_layout_objects_in_lum(lum, lum_size);
+
+ if (comp_v1) {
+ if (!(comp_v1->lcm_entries[i].lcme_flags &
+ LCME_FL_INIT) && obj_count != 0)
+ return false;
+ } else if (obj_count != lum->lmm_stripe_count) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
* Convert the data from a lov_user_md to a newly allocated llapi_layout.
* The caller is responsible for freeing the returned pointer.
*
- * \param[in] lum LOV user metadata structure to copy data from
- * \param[in] lum_size size the the lum passed in
+ * \param[in] lov_xattr LOV user metadata xattr to copy data from
+ * \param[in] lov_xattr_size size the lov_xattr_size passed in
+ * \param[in] flags bitwise-or'd flags to control the behavior
*
* \retval valid llapi_layout pointer on success
* \retval NULL if memory allocation fails
*/
-static struct llapi_layout *
-llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
+struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
+ ssize_t lov_xattr_size,
+ uint32_t flags)
{
+ struct lov_user_md *lum = lov_xattr;
struct lov_comp_md_v1 *comp_v1 = NULL;
struct lov_comp_md_entry_v1 *ent;
struct lov_user_md *v1;
- struct llapi_layout *layout;
+ struct llapi_layout *layout = NULL;
struct llapi_layout_comp *comp;
int i, ent_count = 0, obj_count;
- layout = __llapi_layout_alloc();
- if (layout == NULL)
+ if (lov_xattr == NULL || lov_xattr_size <= 0) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /* Return an error if we got back a partial layout. */
+ if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) {
+ errno = ERANGE;
return NULL;
+ }
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+ if (flags & LLAPI_LXF_COPY) {
+ lum = malloc(lov_xattr_size);
+ if (lum == NULL) {
+ errno = ENOMEM;
+ return NULL;
+ }
+ memcpy(lum, lov_xattr, lov_xattr_size);
+ }
+#endif
+
+ llapi_layout_swab_lov_user_md(lum, lov_xattr_size);
+
+ if ((flags & LLAPI_LXF_CHECK) &&
+ !llapi_layout_lum_valid(lum, lov_xattr_size)) {
+ errno = EBADSLT;
+ goto out;
+ }
+
+ layout = __llapi_layout_alloc();
+ if (layout == NULL) {
+ errno = ENOMEM;
+ goto out;
+ }
if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
comp_v1 = (struct lov_comp_md_v1 *)lum;
lum->lmm_magic == LOV_MAGIC_V3) {
ent_count = 1;
layout->llot_is_composite = false;
+
+ if (lov_xattr_size <= 0) {
+ errno = EINVAL;
+ goto out_layout;
+ }
+ } else {
+ errno = EOPNOTSUPP;
+ goto out_layout;
}
if (ent_count == 0) {
errno = EINVAL;
- goto error;
+ goto out_layout;
}
v1 = (struct lov_user_md *)lum;
ent = &comp_v1->lcm_entries[i];
v1 = (struct lov_user_md *)((char *)comp_v1 +
ent->lcme_offset);
- lum_size = ent->lcme_size;
+ lov_xattr_size = ent->lcme_size;
} else {
ent = NULL;
}
- obj_count = llapi_layout_objects_in_lum(v1, lum_size);
+ obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
comp = __llapi_comp_alloc(obj_count);
if (comp == NULL)
- goto error;
+ goto out_layout;
if (ent != NULL) {
comp->llc_extent.e_start = ent->lcme_extent.e_start;
comp->llc_extent.e_end = ent->lcme_extent.e_end;
comp->llc_id = ent->lcme_id;
comp->llc_flags = ent->lcme_flags;
+ if (comp->llc_flags & LCME_FL_NOSYNC)
+ comp->llc_timestamp = ent->lcme_timestamp;
} else {
comp->llc_extent.e_start = 0;
comp->llc_extent.e_end = LUSTRE_EOF;
layout->llot_cur_comp = comp;
}
+out:
+ if (lum != lov_xattr)
+ free(lum);
return layout;
-error:
+out_layout:
llapi_layout_free(layout);
- return NULL;
+ layout = NULL;
+ goto out;
}
/**
ent = &comp_v1->lcm_entries[ent_idx];
ent->lcme_id = comp->llc_id;
ent->lcme_flags = comp->llc_flags;
+ if (ent->lcme_flags & LCME_FL_NOSYNC)
+ ent->lcme_timestamp = comp->llc_timestamp;
ent->lcme_extent.e_start = comp->llc_extent.e_start;
ent->lcme_extent.e_end = comp->llc_extent.e_end;
ent->lcme_size = blob_size;
}
/**
- * Check if the given \a lum_size is large enough to hold the required
- * fields in \a lum.
- *
- * \param[in] lum the struct lov_user_md to check
- * \param[in] lum_size the number of bytes in \a lum
- *
- * \retval true the \a lum_size is too small
- * \retval false the \a lum_size is large enough
- */
-static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
-{
- uint32_t magic;
-
- if (lum_size < sizeof(lum->lmm_magic))
- return true;
-
- if (lum->lmm_magic == LOV_MAGIC_V1 ||
- lum->lmm_magic == __swab32(LOV_MAGIC_V1))
- magic = LOV_MAGIC_V1;
- else if (lum->lmm_magic == LOV_MAGIC_V3 ||
- lum->lmm_magic == __swab32(LOV_MAGIC_V3))
- magic = LOV_MAGIC_V3;
- else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
- lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
- magic = LOV_MAGIC_COMP_V1;
- else
- return true;
-
- if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
- return lum_size < lov_user_md_size(0, magic);
- else
- return lum_size < sizeof(struct lov_comp_md_v1);
-}
-
-/* Verify if the objects count in lum is consistent with the
- * stripe count in lum. It applies to regular file only. */
-static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
-{
- struct lov_comp_md_v1 *comp_v1 = NULL;
- int i, ent_count, obj_count;
-
- if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
- comp_v1 = (struct lov_comp_md_v1 *)lum;
- ent_count = comp_v1->lcm_entry_count;
- } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
- lum->lmm_magic == LOV_MAGIC_V3) {
- ent_count = 1;
- } else {
- return false;
- }
-
- for (i = 0; i < ent_count; i++) {
- if (comp_v1) {
- lum = (struct lov_user_md *)((char *)comp_v1 +
- comp_v1->lcm_entries[i].lcme_offset);
- lum_size = comp_v1->lcm_entries[i].lcme_size;
- }
- obj_count = llapi_layout_objects_in_lum(lum, lum_size);
-
- if (comp_v1) {
- if (!(comp_v1->lcm_entries[i].lcme_flags &
- LCME_FL_INIT) && obj_count != 0)
- return false;
- } else if (obj_count != lum->lmm_stripe_count) {
- return false;
- }
- }
- return true;
-}
-
-/**
* Get the striping layout for the file referenced by file descriptor \a fd.
*
* If the filesystem does not support the "lustre." xattr namespace, the
goto out;
}
- /* Return an error if we got back a partial layout. */
- if (llapi_layout_lum_truncated(lum, bytes_read)) {
- errno = EINTR;
- goto out;
- }
-
- llapi_layout_swab_lov_user_md(lum, bytes_read);
-
/* Directories may have a positive non-zero lum->lmm_stripe_count
* yet have an empty lum->lmm_objects array. For non-directories the
* amount of data returned from the kernel must be consistent
if (fstat(fd, &st) < 0)
goto out;
- if (!S_ISDIR(st.st_mode) && !llapi_layout_lum_valid(lum, bytes_read)) {
- errno = EINTR;
- goto out;
- }
-
- layout = llapi_layout_from_lum(lum, bytes_read);
+ layout = llapi_layout_get_by_xattr(lum, bytes_read,
+ S_ISDIR(st.st_mode) ? 0 : LLAPI_LXF_CHECK);
out:
free(lum);
return layout;
/* not in the specified mirror */
if (j == ids_nr)
goto next;
+ } else if (flags & LCME_FL_NOSYNC) {
+ /* if not specified mirrors, do not resync "nosync"
+ * mirrors */
+ goto next;
}
rc = llapi_layout_comp_id_get(layout, &id);
struct llapi_resync_comp *comp_array,
int comp_size, uint64_t start, uint64_t end)
{
- size_t count;
+ uint64_t count;
size_t page_size = sysconf(_SC_PAGESIZE);
const size_t buflen = 4 << 20; /* 4M */
void *buf;
while (count > 0) {
uint32_t src;
uint64_t mirror_end = 0;
+ uint64_t bytes_left;
ssize_t bytes_read;
size_t to_read;
size_t to_write;
return -ENOENT;
if (mirror_end == OBD_OBJECT_EOF) {
- to_read = count;
+ bytes_left = count;
} else {
- to_read = MIN(count, mirror_end - pos);
- to_read = (to_read + page_size - 1) & ~(page_size - 1);
+ bytes_left = MIN(count, mirror_end - pos);
+ bytes_left = ((bytes_left - 1) | (page_size - 1)) + 1;
}
- to_read = MIN(buflen, to_read);
+ to_read = MIN(buflen, bytes_left);
bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
if (bytes_read == 0) {
}
/* round up to page align to make direct IO happy. */
- to_write = (bytes_read + page_size - 1) & ~(page_size - 1);
+ to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
for (i = 0; i < comp_size; i++) {
ssize_t written;
+ off_t pos2 = pos;
+ size_t to_write2 = to_write;
/* skip non-overlapped component */
- if (pos > comp_array[i].lrc_end ||
- pos + to_write < comp_array[i].lrc_start)
+ if (pos >= comp_array[i].lrc_end ||
+ pos + to_write <= comp_array[i].lrc_start)
continue;
+ if (pos < comp_array[i].lrc_start)
+ pos2 = comp_array[i].lrc_start;
+
+ to_write2 -= pos2 - pos;
+
+ if ((pos + to_write) > comp_array[i].lrc_end)
+ to_write2 -= pos + to_write -
+ comp_array[i].lrc_end;
+
written = llapi_mirror_write(fd,
- comp_array[i].lrc_mirror_id, buf,
- to_write, pos);
+ comp_array[i].lrc_mirror_id,
+ buf + pos2 - pos,
+ to_write2, pos2);
if (written < 0) {
/**
* this component is not written successfully,
comp_array[i].lrc_synced = true;
continue;
}
- assert(written == to_write);
+ assert(written == to_write2);
}
pos += bytes_read;