Whamcloud - gitweb
LU-10916 lfs: improve lfs mirror resync 08/32808/4
authorBobi Jam <bobijam@whamcloud.com>
Wed, 11 Jul 2018 16:24:27 +0000 (10:24 -0600)
committerOleg Drokin <green@whamcloud.com>
Mon, 6 Aug 2018 14:36:02 +0000 (14:36 +0000)
Make mirror resync use read+write+write+... mode instead do the
resync on each stale mirror of a file separately (read+write,
read+write, ...).

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: I627fa53fcfde4811b2cd9c84c8545defe151206c
Reviewed-on: https://review.whamcloud.com/32808
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre/lustreapi.h
lustre/tests/mirror_io.c
lustre/utils/lfs.c
lustre/utils/liblustreapi_layout.c
lustre/utils/liblustreapi_mirror.c

index 59f7129..aa9a290 100644 (file)
@@ -516,11 +516,18 @@ struct llapi_resync_comp {
 /** Opaque data type abstracting the layout of a Lustre file. */
 struct llapi_layout;
 
+int llapi_mirror_truncate(int fd, unsigned int id, off_t length);
+ssize_t llapi_mirror_write(int fd, unsigned int id, const void *buf,
+                          size_t count, off_t pos);
+uint32_t llapi_mirror_find(struct llapi_layout *layout,
+                          uint64_t file_start, uint64_t file_end,
+                          uint64_t *endp);
 int llapi_mirror_find_stale(struct llapi_layout *layout,
                struct llapi_resync_comp *comp, size_t comp_size,
                __u16 *mirror_ids, int ids_nr);
-ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout,
-                               uint32_t dst, uint64_t start, uint64_t end);
+int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
+                            struct llapi_resync_comp *comp_array,
+                            int comp_size,  uint64_t start, uint64_t end);
 /*
  * Flags to control how layouts are retrieved.
  */
index 5deaeba..903a4f7 100644 (file)
@@ -359,6 +359,48 @@ static enum resync_errors resync_parse_error(const char *err)
        return -1;
 }
 
+ssize_t mirror_resync_one(int fd, struct llapi_layout *layout,
+                         uint32_t dst, uint64_t start, uint64_t end)
+{
+       uint64_t mirror_end = 0;
+       ssize_t result = 0;
+       size_t count;
+
+       if (end == OBD_OBJECT_EOF)
+               count = OBD_OBJECT_EOF;
+       else
+               count = end - start;
+
+       while (count > 0) {
+               uint32_t src;
+               size_t to_copy;
+               ssize_t copied;
+
+               src = llapi_mirror_find(layout, start, end, &mirror_end);
+               if (src == 0)
+                       return -ENOENT;
+
+               if (mirror_end == OBD_OBJECT_EOF)
+                       to_copy = count;
+               else
+                       to_copy = MIN(count, mirror_end - start);
+
+               copied = llapi_mirror_copy(fd, src, dst, start, to_copy);
+               if (copied < 0)
+                       return copied;
+
+               result += copied;
+               if (copied < to_copy) /* end of file */
+                       break;
+
+               if (count != OBD_OBJECT_EOF)
+                       count -= copied;
+               start += copied;
+       }
+
+       return result;
+}
+
 static void mirror_resync(int argc, char *argv[])
 {
        const char *fname;
@@ -464,8 +506,8 @@ static void mirror_resync(int argc, char *argv[])
                }
                printf("\b\n");
 
-               res = llapi_mirror_resync_one(fd, layout, mirror_id,
-                                             comp_array[idx].lrc_start, end);
+               res = mirror_resync_one(fd, layout, mirror_id,
+                                       comp_array[idx].lrc_start, end);
                if (res > 0) {
                        int j;
 
index 8c7e07d..e062e34 100644 (file)
@@ -7970,6 +7970,8 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
        struct llapi_layout *layout;
        struct stat stbuf;
        uint32_t flr_state;
+       uint64_t start;
+       uint64_t end;
        int comp_size = 0;
        int idx;
        int fd;
@@ -8042,51 +8044,29 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
                goto free_layout;
        }
 
-       idx = 0;
-       while (idx < comp_size) {
-               ssize_t result;
-               uint64_t end;
-               __u16 mirror_id;
-               int i;
-
-               rc = llapi_lease_check(fd);
-               if (rc != LL_LEASE_WRLCK) {
-                       fprintf(stderr, "%s: '%s' lost lease lock.\n",
-                               progname, fname);
-                       goto free_layout;
-               }
-
-               mirror_id = comp_array[idx].lrc_mirror_id;
-               end = comp_array[idx].lrc_end;
-
-               /* try to combine adjacent component */
-               for (i = idx + 1; i < comp_size; i++) {
-                       if (mirror_id != comp_array[i].lrc_mirror_id ||
-                           end != comp_array[i].lrc_start)
-                               break;
-                       end = comp_array[i].lrc_end;
-               }
-
-               result = llapi_mirror_resync_one(fd, layout, mirror_id,
-                                                comp_array[idx].lrc_start,
-                                                end);
-               if (result < 0) {
-                       fprintf(stderr, "%s: '%s' llapi_mirror_resync_one: "
-                               "%ld.\n", progname, fname, result);
-                       rc = result;
-                       goto unlock;
-               } else if (result > 0) {
-                       int j;
-
-                       /* mark synced components */
-                       for (j = idx; j < i; j++)
-                               comp_array[j].lrc_synced = true;
-               }
+       /* get the read range [start, end) */
+       start = comp_array[0].lrc_start;
+       end = comp_array[0].lrc_end;
+       for (idx = 1; idx < comp_size; idx++) {
+               if (comp_array[idx].lrc_start < start)
+                       start = comp_array[idx].lrc_start;
+               if (end < comp_array[idx].lrc_end)
+                       end = comp_array[idx].lrc_end;
+       }
 
-               idx = i;
+       rc = llapi_lease_check(fd);
+       if (rc != LL_LEASE_WRLCK) {
+               fprintf(stderr, "%s: '%s' lost lease lock.\n",
+                       progname, fname);
+               goto free_layout;
        }
 
-unlock:
+       rc = llapi_mirror_resync_many(fd, layout, comp_array, comp_size,
+                                     start, end);
+       if (rc < 0)
+               fprintf(stderr, "%s: '%s' llapi_mirror_resync_many: %d.\n",
+                       progname, fname, rc);
+
        /* prepare ioc for lease put */
        ioc->lil_mode = LL_LEASE_UNLCK;
        ioc->lil_flags = LL_LEASE_RESYNC_DONE;
index e85b39d..a282d7a 100644 (file)
@@ -2491,9 +2491,9 @@ error:
 }
 
 /* locate @layout to a valid component covering file [file_start, file_end) */
-static uint32_t llapi_mirror_find(struct llapi_layout *layout,
-                                 uint64_t file_start, uint64_t file_end,
-                                 uint64_t *endp)
+uint32_t llapi_mirror_find(struct llapi_layout *layout,
+                          uint64_t file_start, uint64_t file_end,
+                          uint64_t *endp)
 {
        uint32_t mirror_id = 0;
        int rc;
@@ -2546,12 +2546,21 @@ static uint32_t llapi_mirror_find(struct llapi_layout *layout,
        return mirror_id;
 }
 
-ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout,
-                               uint32_t dst, uint64_t start, uint64_t end)
+int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
+                            struct llapi_resync_comp *comp_array,
+                            int comp_size,  uint64_t start, uint64_t end)
 {
-       uint64_t mirror_end = 0;
-       ssize_t result = 0;
        size_t count;
+       size_t page_size = sysconf(_SC_PAGESIZE);
+       const size_t buflen = 4 << 20; /* 4M */
+       void *buf;
+       uint64_t pos = start;
+       int i;
+       int rc;
+
+       rc = posix_memalign(&buf, page_size, buflen);
+       if (rc)
+               return -rc;
 
        if (end == OBD_OBJECT_EOF)
                count = OBD_OBJECT_EOF;
@@ -2560,30 +2569,86 @@ ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout,
 
        while (count > 0) {
                uint32_t src;
-               size_t to_copy;
-               ssize_t copied;
+               uint64_t mirror_end = 0;
+               ssize_t bytes_read;
+               size_t to_read;
+               size_t to_write;
 
-               src = llapi_mirror_find(layout, start, end, &mirror_end);
+               src = llapi_mirror_find(layout, pos, end, &mirror_end);
                if (src == 0)
                        return -ENOENT;
 
-               if (mirror_end == OBD_OBJECT_EOF)
-                       to_copy = count;
-               else
-                       to_copy = MIN(count, mirror_end - start);
-
-               copied = llapi_mirror_copy(fd, src, dst, start, to_copy);
-               if (copied < 0)
-                       return copied;
+               if (mirror_end == OBD_OBJECT_EOF) {
+                       to_read = count;
+               } else {
+                       to_read = MIN(count, mirror_end - pos);
+                       to_read = (to_read + page_size - 1) & ~(page_size - 1);
+               }
+               to_read = MIN(buflen, to_read);
 
-               result += copied;
-               if (copied < to_copy) /* end of file */
+               bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
+               if (bytes_read == 0) {
+                       /* end of file */
+                       break;
+               }
+               if (bytes_read < 0) {
+                       rc = bytes_read;
                        break;
+               }
+
+               /* round up to page align to make direct IO happy. */
+               to_write = (bytes_read + page_size - 1) & ~(page_size - 1);
+
+               for (i = 0; i < comp_size; i++) {
+                       ssize_t written;
+
+                       /* skip non-overlapped component */
+                       if (pos > comp_array[i].lrc_end ||
+                           pos + to_write < comp_array[i].lrc_start)
+                               continue;
+
+                       written = llapi_mirror_write(fd,
+                                       comp_array[i].lrc_mirror_id, buf,
+                                       to_write, pos);
+                       if (written < 0) {
+                               /**
+                                * this component is not written successfully,
+                                * mark it using its lrc_synced, it is supposed
+                                * to be false before getting here.
+                                *
+                                * And before this function returns, all
+                                * elements of comp_array will reverse their
+                                * lrc_synced flag to reflect their true
+                                * meanings.
+                                */
+                               comp_array[i].lrc_synced = true;
+                               continue;
+                       }
+                       assert(written == to_write);
+               }
 
-               if (count != OBD_OBJECT_EOF)
-                       count -= copied;
-               start += copied;
+               pos += bytes_read;
+               count -= bytes_read;
        }
 
-       return result;
+       free(buf);
+
+       if (rc < 0) {
+               for (i = 0; i < comp_size; i++)
+                       comp_array[i].lrc_synced = false;
+               return rc;
+       }
+
+       for (i = 0; i < comp_size; i++) {
+               comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
+               if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
+                       rc = llapi_mirror_truncate(fd,
+                                       comp_array[i].lrc_mirror_id, pos);
+                       if (rc < 0)
+                               comp_array[i].lrc_synced = false;
+               }
+       }
+
+       /* partially successful is successful */
+       return 0;
 }
index 464b9fb..4a59b46 100644 (file)
@@ -147,8 +147,8 @@ ssize_t llapi_mirror_read(int fd, unsigned int id, void *buf, size_t count,
        return result;
 }
 
-static ssize_t llapi_mirror_write(int fd, unsigned int id,
-                                  const void *buf, size_t count, off_t pos)
+ssize_t llapi_mirror_write(int fd, unsigned int id, const void *buf,
+                          size_t count, off_t pos)
 {
        size_t page_size = sysconf(_SC_PAGESIZE);
        ssize_t result = 0;
@@ -186,7 +186,7 @@ static ssize_t llapi_mirror_write(int fd, unsigned int id,
        return result;
 }
 
-static int llapi_mirror_truncate(int fd, unsigned int id, off_t length)
+int llapi_mirror_truncate(int fd, unsigned int id, off_t length)
 {
        int rc;