Whamcloud - gitweb
LU-10966 utils: Fix `lfs check` documentation and arguments
[fs/lustre-release.git] / lustre / utils / liblustreapi_layout.c
index ae5a074..f84b3b3 100644 (file)
@@ -23,7 +23,7 @@
  * Lustre files while hiding details of the internal data structures
  * from the user.
  *
- * Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2016, 2017, Intel Corporation.
  *
  * Author: Ned Bass <bass6@llnl.gov>
  */
@@ -34,6 +34,7 @@
 #include <unistd.h>
 #include <errno.h>
 #include <limits.h>
+#include <assert.h>
 #include <sys/xattr.h>
 #include <sys/param.h>
 
@@ -59,6 +60,7 @@ struct llapi_layout_comp {
        struct lu_extent        llc_extent;     /* [start, end) of component */
        uint32_t                llc_id;         /* unique ID of component */
        uint32_t                llc_flags;      /* LCME_FL_* flags */
+       uint64_t                llc_timestamp;  /* snapshot timestamp */
        struct list_head        llc_list;       /* linked to the llapi_layout
                                                   components list */
 };
@@ -147,6 +149,7 @@ llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
                        ent = &comp_v1->lcm_entries[i];
                        __swab32s(&ent->lcme_id);
                        __swab32s(&ent->lcme_flags);
+                       __swab64s(&ent->lcme_timestamp);
                        __swab64s(&ent->lcme_extent.e_start);
                        __swab64s(&ent->lcme_extent.e_end);
                        __swab32s(&ent->lcme_offset);
@@ -357,15 +360,16 @@ struct llapi_layout *llapi_layout_alloc(void)
  * Convert the data from a lov_user_md to a newly allocated llapi_layout.
  * The caller is responsible for freeing the returned pointer.
  *
- * \param[in] lum      LOV user metadata structure to copy data from
- * \param[in] lum_size size the the lum passed in
+ * \param[in] lov_xattr                LOV user metadata xattr to copy data from
+ * \param[in] lov_xattr_size   size the lov_xattr_size passed in
  *
  * \retval             valid llapi_layout pointer on success
  * \retval             NULL if memory allocation fails
  */
-static struct llapi_layout *
-llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
+struct llapi_layout *llapi_layout_get_by_xattr(const void *lov_xattr,
+                                              ssize_t lov_xattr_size)
 {
+       const struct lov_user_md *lum = lov_xattr;
        struct lov_comp_md_v1 *comp_v1 = NULL;
        struct lov_comp_md_entry_v1 *ent;
        struct lov_user_md *v1;
@@ -389,6 +393,14 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
                   lum->lmm_magic == LOV_MAGIC_V3) {
                ent_count = 1;
                layout->llot_is_composite = false;
+
+               if (lov_xattr_size <= 0) {
+                       errno = EINVAL;
+                       goto error;
+               }
+       } else {
+               errno = EOPNOTSUPP;
+               goto error;
        }
 
        if (ent_count == 0) {
@@ -402,12 +414,12 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
                        ent = &comp_v1->lcm_entries[i];
                        v1 = (struct lov_user_md *)((char *)comp_v1 +
                                ent->lcme_offset);
-                       lum_size = ent->lcme_size;
+                       lov_xattr_size = ent->lcme_size;
                } else {
                        ent = NULL;
                }
 
-               obj_count = llapi_layout_objects_in_lum(v1, lum_size);
+               obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
                comp = __llapi_comp_alloc(obj_count);
                if (comp == NULL)
                        goto error;
@@ -417,6 +429,8 @@ llapi_layout_from_lum(const struct lov_user_md *lum, int lum_size)
                        comp->llc_extent.e_end = ent->lcme_extent.e_end;
                        comp->llc_id = ent->lcme_id;
                        comp->llc_flags = ent->lcme_flags;
+                       if (comp->llc_flags & LCME_FL_NOSYNC)
+                               comp->llc_timestamp = ent->lcme_timestamp;
                } else {
                        comp->llc_extent.e_start = 0;
                        comp->llc_extent.e_end = LUSTRE_EOF;
@@ -621,6 +635,8 @@ llapi_layout_to_lum(const struct llapi_layout *layout)
                        ent = &comp_v1->lcm_entries[ent_idx];
                        ent->lcme_id = comp->llc_id;
                        ent->lcme_flags = comp->llc_flags;
+                       if (ent->lcme_flags & LCME_FL_NOSYNC)
+                               ent->lcme_timestamp = comp->llc_timestamp;
                        ent->lcme_extent.e_start = comp->llc_extent.e_start;
                        ent->lcme_extent.e_end = comp->llc_extent.e_end;
                        ent->lcme_size = blob_size;
@@ -877,7 +893,7 @@ struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags)
                goto out;
        }
 
-       layout = llapi_layout_from_lum(lum, bytes_read);
+       layout = llapi_layout_get_by_xattr(lum, bytes_read);
 out:
        free(lum);
        return layout;
@@ -1430,7 +1446,7 @@ int llapi_layout_pool_name_set(struct llapi_layout *layout,
  *
  * \param[in] path             name of the file to open
  * \param[in] open_flags       open() flags
- * \param[in] mode             permissions to create new file with
+ * \param[in] mode             permissions to create file, filtered by umask
  * \param[in] layout           layout to create new file with
  *
  * \retval             non-negative file descriptor on successful open
@@ -1538,6 +1554,32 @@ int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
        return 0;
 }
 
+const char *llapi_layout_flags_string(uint32_t flags)
+{
+       switch (flags & LCM_FL_FLR_MASK) {
+       case LCM_FL_RDONLY:
+               return "ro";
+       case LCM_FL_WRITE_PENDING:
+               return "wp";
+       case LCM_FL_SYNC_PENDING:
+               return "sp";
+       }
+
+       return "0";
+}
+
+const __u16 llapi_layout_string_flags(char *string)
+{
+       if (strncmp(string, "ro", strlen(string)) == 0)
+               return LCM_FL_RDONLY;
+       if (strncmp(string, "wp", strlen(string)) == 0)
+               return LCM_FL_WRITE_PENDING;
+       if (strncmp(string, "sp", strlen(string)) == 0)
+               return LCM_FL_SYNC_PENDING;
+
+       return 0;
+}
+
 /**
  * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
  * @count: Mirror count value to be checked.
@@ -1660,7 +1702,7 @@ int llapi_layout_comp_extent_set(struct llapi_layout *layout,
        if (comp->llc_list.prev != &layout->llot_comp_list) {
                prev = list_entry(comp->llc_list.prev, typeof(*prev),
                                  llc_list);
-               if (start != prev->llc_extent.e_end) {
+               if (start != 0 && start != prev->llc_extent.e_end) {
                        errno = EINVAL;
                        return -1;
                }
@@ -1669,7 +1711,8 @@ int llapi_layout_comp_extent_set(struct llapi_layout *layout,
        if (comp->llc_list.next != &layout->llot_comp_list) {
                next = list_entry(comp->llc_list.next, typeof(*next),
                                  llc_list);
-               if (end != next->llc_extent.e_start) {
+               if (next->llc_extent.e_start != 0 &&
+                   end != next->llc_extent.e_start) {
                        errno = EINVAL;
                        return -1;
                }
@@ -1848,6 +1891,37 @@ int llapi_layout_comp_add(struct llapi_layout *layout)
 
        return 0;
 }
+/**
+ * Adds a first component of a mirror to \a layout.
+ * The \a layout will change it's current component pointer to
+ * the newly added component, and it'll be turned into a composite
+ * layout if it was not before the adding.
+ *
+ * \param[in] layout           existing composite or plain layout
+ *
+ * \retval     0 on success
+ * \retval     <0 if error occurs
+ */
+int llapi_layout_add_first_comp(struct llapi_layout *layout)
+{
+       struct llapi_layout_comp *comp, *new;
+
+       comp = __llapi_layout_cur_comp(layout);
+       if (comp == NULL)
+               return -1;
+
+       new = __llapi_comp_alloc(0);
+       if (new == NULL)
+               return -1;
+
+       new->llc_extent.e_start = 0;
+
+       list_add_tail(&new->llc_list, &layout->llot_comp_list);
+       layout->llot_cur_comp = new;
+       layout->llot_is_composite = true;
+
+       return 0;
+}
 
 /**
  * Deletes current component from the composite layout. The component
@@ -2119,13 +2193,100 @@ out:
  * comp->lcme_id value, which must be an unique component ID. The new
  * attributes are passed in by @comp and @valid is used to specify which
  * attributes in the component are going to be changed.
+ *
+ * \param[in] path     path name of the file
+ * \param[in] ids      An array of component IDs
+ * \param[in] flags    flags: LCME_FL_* or;
+ *                     negative flags: (LCME_FL_NEG|LCME_FL_*)
+ * \param[in] count    Number of elements in ids and flags array
  */
-int llapi_layout_file_comp_set(const char *path,
-                              const struct llapi_layout *comp,
-                              uint32_t valid)
+int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags,
+                              size_t count)
 {
-       errno = EOPNOTSUPP;
-       return -1;
+       int rc = -1, fd = -1, i;
+       size_t lum_size;
+       struct llapi_layout *layout;
+       struct llapi_layout_comp *comp;
+       struct lov_user_md *lum = NULL;
+
+       if (path == NULL) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (!count)
+               return 0;
+
+       for (i = 0; i < count; i++) {
+               if (!ids[i] || !flags[i]) {
+                       errno = EINVAL;
+                       return -1;
+               }
+
+               if (ids[i] > LCME_ID_MAX || (flags[i] & ~LCME_KNOWN_FLAGS)) {
+                       errno = EINVAL;
+                       return -1;
+               }
+
+               /* do not allow to set or clear INIT flag */
+               if (flags[i] & LCME_FL_INIT) {
+                       errno = EINVAL;
+                       return -1;
+               }
+       }
+
+       layout = __llapi_layout_alloc();
+       if (layout == NULL)
+               return -1;
+
+       layout->llot_is_composite = true;
+       for (i = 0; i < count; i++) {
+               comp = __llapi_comp_alloc(0);
+               if (comp == NULL)
+                       goto out;
+
+               comp->llc_id = ids[i];
+               comp->llc_flags = flags[i];
+
+               list_add_tail(&comp->llc_list, &layout->llot_comp_list);
+               layout->llot_cur_comp = comp;
+       }
+
+       lum = llapi_layout_to_lum(layout);
+       if (lum == NULL)
+               goto out;
+
+       lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
+
+       fd = open(path, O_RDWR);
+       if (fd < 0)
+               goto out;
+
+       /* flush cached pages from clients */
+       rc = llapi_file_flush(fd);
+       if (rc) {
+               errno = -rc;
+               rc = -1;
+               goto out_close;
+       }
+
+       rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0);
+       if (rc < 0)
+               goto out_close;
+
+       rc = 0;
+
+out_close:
+       if (fd >= 0) {
+               int tmp_errno = errno;
+               close(fd);
+               errno = tmp_errno;
+       }
+out:
+       if (lum)
+               free(lum);
+       llapi_layout_free(layout);
+       return rc;
 }
 
 /**
@@ -2142,6 +2303,47 @@ bool llapi_layout_is_composite(struct llapi_layout *layout)
 }
 
 /**
+ * Iterate every components in the @layout and call callback function @cb.
+ *
+ * \param[in] layout   component layout list.
+ * \param[in] cb       callback for each component
+ * \param[in] cbdata   callback data
+ *
+ * \retval < 0                         error happens during the iteration
+ * \retval LLAPI_LAYOUT_ITER_CONT      finished the iteration w/o error
+ * \retval LLAPI_LAYOUT_ITER_STOP      got something, stop the iteration
+ */
+int llapi_layout_comp_iterate(struct llapi_layout *layout,
+                             llapi_layout_iter_cb cb, void *cbdata)
+{
+       int rc;
+
+       rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
+       if (rc < 0)
+               return rc;
+
+       /**
+        * make sure on success llapi_layout_comp_use() API returns 0 with
+        * USE_FIRST.
+        */
+       assert(rc == 0);
+
+       while (1) {
+               rc = cb(layout, cbdata);
+               if (rc != LLAPI_LAYOUT_ITER_CONT)
+                       break;
+
+               rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
+               if (rc < 0)
+                       return rc;
+               else if (rc == 1)       /* reached the last comp */
+                       return LLAPI_LAYOUT_ITER_CONT;
+       }
+
+       return rc;
+}
+
+/**
  * llapi_layout_merge() - Merge a composite layout into another one.
  * @dst_layout: Destination composite layout.
  * @src_layout: Source composite layout.
@@ -2237,11 +2439,8 @@ int llapi_mirror_find_stale(struct llapi_layout *layout,
        int rc;
 
        rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
-       if (rc < 0) {
-               fprintf(stderr, "%s: move to the first layout component: %s.\n",
-                       __func__, strerror(errno));
+       if (rc < 0)
                goto error;
-       }
 
        while (rc == 0) {
                uint32_t id;
@@ -2250,21 +2449,15 @@ int llapi_mirror_find_stale(struct llapi_layout *layout,
                uint64_t start, end;
 
                rc = llapi_layout_comp_flags_get(layout, &flags);
-               if (rc < 0) {
-                       fprintf(stderr, "llapi_layout_comp_flags_get: %s.\n",
-                               strerror(errno));
+               if (rc < 0)
                        goto error;
-               }
 
                if (!(flags & LCME_FL_STALE))
                        goto next;
 
                rc = llapi_layout_mirror_id_get(layout, &mirror_id);
-               if (rc < 0) {
-                       fprintf(stderr, "llapi_layout_mirror_id_get: %s.\n",
-                               strerror(errno));
+               if (rc < 0)
                        goto error;
-               }
 
                /* the caller only wants stale components from specific
                 * mirrors */
@@ -2279,21 +2472,19 @@ int llapi_mirror_find_stale(struct llapi_layout *layout,
                        /* not in the specified mirror */
                        if (j == ids_nr)
                                goto next;
+               } else if (flags & LCME_FL_NOSYNC) {
+                       /* if not specified mirrors, do not resync "nosync"
+                        * mirrors */
+                       goto next;
                }
 
                rc = llapi_layout_comp_id_get(layout, &id);
-               if (rc < 0) {
-                       fprintf(stderr, "llapi_layout_comp_id_get: %s.\n",
-                               strerror(errno));
+               if (rc < 0)
                        goto error;
-               }
 
                rc = llapi_layout_comp_extent_get(layout, &start, &end);
-               if (rc < 0) {
-                       fprintf(stderr, "llapi_layout_comp_extent_get: %s.\n",
-                               strerror(errno));
+               if (rc < 0)
                        goto error;
-               }
 
                /* pack this component into @comp array */
                comp[idx].lrc_id = id;
@@ -2303,8 +2494,6 @@ int llapi_mirror_find_stale(struct llapi_layout *layout,
                idx++;
 
                if (idx >= comp_size) {
-                       fprintf(stderr, "%s: resync_comp array too small.\n",
-                               __func__);
                        rc = -EINVAL;
                        goto error;
                }
@@ -2312,8 +2501,6 @@ int llapi_mirror_find_stale(struct llapi_layout *layout,
        next:
                rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
                if (rc < 0) {
-                       fprintf(stderr, "%s: move to the next layout "
-                               "component: %s.\n", __func__, strerror(errno));
                        rc = -EINVAL;
                        goto error;
                }
@@ -2323,9 +2510,9 @@ error:
 }
 
 /* locate @layout to a valid component covering file [file_start, file_end) */
-static uint32_t llapi_mirror_find(struct llapi_layout *layout,
-                                 uint64_t file_start, uint64_t file_end,
-                                 uint64_t *endp)
+uint32_t llapi_mirror_find(struct llapi_layout *layout,
+                          uint64_t file_start, uint64_t file_end,
+                          uint64_t *endp)
 {
        uint32_t mirror_id = 0;
        int rc;
@@ -2378,12 +2565,21 @@ static uint32_t llapi_mirror_find(struct llapi_layout *layout,
        return mirror_id;
 }
 
-ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout,
-                               uint32_t dst, uint64_t start, uint64_t end)
+int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
+                            struct llapi_resync_comp *comp_array,
+                            int comp_size,  uint64_t start, uint64_t end)
 {
-       uint64_t mirror_end = 0;
-       ssize_t result = 0;
        size_t count;
+       size_t page_size = sysconf(_SC_PAGESIZE);
+       const size_t buflen = 4 << 20; /* 4M */
+       void *buf;
+       uint64_t pos = start;
+       int i;
+       int rc;
+
+       rc = posix_memalign(&buf, page_size, buflen);
+       if (rc)
+               return -rc;
 
        if (end == OBD_OBJECT_EOF)
                count = OBD_OBJECT_EOF;
@@ -2392,36 +2588,98 @@ ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout,
 
        while (count > 0) {
                uint32_t src;
-               size_t to_copy;
-               ssize_t copied;
+               uint64_t mirror_end = 0;
+               ssize_t bytes_read;
+               size_t to_read;
+               size_t to_write;
 
-               src = llapi_mirror_find(layout, start, end, &mirror_end);
-               if (src == 0) {
-                       fprintf(stderr, "llapi_mirror_find cannot find "
-                               "component covering %lu.\n", start);
+               src = llapi_mirror_find(layout, pos, end, &mirror_end);
+               if (src == 0)
                        return -ENOENT;
+
+               if (mirror_end == OBD_OBJECT_EOF) {
+                       to_read = count;
+               } else {
+                       to_read = MIN(count, mirror_end - pos);
+                       to_read = (to_read + page_size - 1) & ~(page_size - 1);
                }
+               to_read = MIN(buflen, to_read);
 
-               if (mirror_end == OBD_OBJECT_EOF)
-                       to_copy = count;
-               else
-                       to_copy = MIN(count, mirror_end - start);
+               bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
+               if (bytes_read == 0) {
+                       /* end of file */
+                       break;
+               }
+               if (bytes_read < 0) {
+                       rc = bytes_read;
+                       break;
+               }
 
-               copied = llapi_mirror_copy(fd, src, dst, start, to_copy);
-               if (copied < 0) {
-                       fprintf(stderr, "llapi_mirror_copy returned %zd.\n",
-                               copied);
-                       return copied;
+               /* round up to page align to make direct IO happy. */
+               to_write = (bytes_read + page_size - 1) & ~(page_size - 1);
+
+               for (i = 0; i < comp_size; i++) {
+                       ssize_t written;
+                       off_t pos2 = pos;
+                       size_t to_write2 = to_write;
+
+                       /* skip non-overlapped component */
+                       if (pos >= comp_array[i].lrc_end ||
+                           pos + to_write <= comp_array[i].lrc_start)
+                               continue;
+
+                       if (pos < comp_array[i].lrc_start)
+                               pos2 = comp_array[i].lrc_start;
+
+                       to_write2 -= pos2 - pos;
+
+                       if ((pos + to_write) > comp_array[i].lrc_end)
+                               to_write2 -= pos + to_write -
+                                            comp_array[i].lrc_end;
+
+                       written = llapi_mirror_write(fd,
+                                       comp_array[i].lrc_mirror_id,
+                                       buf + pos2 - pos,
+                                       to_write2, pos2);
+                       if (written < 0) {
+                               /**
+                                * this component is not written successfully,
+                                * mark it using its lrc_synced, it is supposed
+                                * to be false before getting here.
+                                *
+                                * And before this function returns, all
+                                * elements of comp_array will reverse their
+                                * lrc_synced flag to reflect their true
+                                * meanings.
+                                */
+                               comp_array[i].lrc_synced = true;
+                               continue;
+                       }
+                       assert(written == to_write2);
                }
 
-               result += copied;
-               if (copied < to_copy) /* end of file */
-                       break;
+               pos += bytes_read;
+               count -= bytes_read;
+       }
+
+       free(buf);
+
+       if (rc < 0) {
+               for (i = 0; i < comp_size; i++)
+                       comp_array[i].lrc_synced = false;
+               return rc;
+       }
 
-               if (count != OBD_OBJECT_EOF)
-                       count -= copied;
-               start += copied;
+       for (i = 0; i < comp_size; i++) {
+               comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
+               if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
+                       rc = llapi_mirror_truncate(fd,
+                                       comp_array[i].lrc_mirror_id, pos);
+                       if (rc < 0)
+                               comp_array[i].lrc_synced = false;
+               }
        }
 
-       return result;
+       /* partially successful is successful */
+       return 0;
 }