From 0e5c12ac29a9622e8ca05d5e39cd5e2a721ace93 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Wed, 11 Jul 2018 10:24:27 -0600 Subject: [PATCH] LU-10916 lfs: improve lfs mirror resync Make mirror resync use read+write+write+... mode instead do the resync on each stale mirror of a file separately (read+write, read+write, ...). Signed-off-by: Bobi Jam Change-Id: I627fa53fcfde4811b2cd9c84c8545defe151206c Reviewed-on: https://review.whamcloud.com/32808 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- lustre/include/lustre/lustreapi.h | 11 +++- lustre/tests/mirror_io.c | 46 ++++++++++++++- lustre/utils/lfs.c | 64 ++++++++------------- lustre/utils/liblustreapi_layout.c | 113 +++++++++++++++++++++++++++++-------- lustre/utils/liblustreapi_mirror.c | 6 +- 5 files changed, 167 insertions(+), 73 deletions(-) diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index 59f7129..aa9a290 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -516,11 +516,18 @@ struct llapi_resync_comp { /** Opaque data type abstracting the layout of a Lustre file. */ struct llapi_layout; +int llapi_mirror_truncate(int fd, unsigned int id, off_t length); +ssize_t llapi_mirror_write(int fd, unsigned int id, const void *buf, + size_t count, off_t pos); +uint32_t llapi_mirror_find(struct llapi_layout *layout, + uint64_t file_start, uint64_t file_end, + uint64_t *endp); int llapi_mirror_find_stale(struct llapi_layout *layout, struct llapi_resync_comp *comp, size_t comp_size, __u16 *mirror_ids, int ids_nr); -ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout, - uint32_t dst, uint64_t start, uint64_t end); +int llapi_mirror_resync_many(int fd, struct llapi_layout *layout, + struct llapi_resync_comp *comp_array, + int comp_size, uint64_t start, uint64_t end); /* * Flags to control how layouts are retrieved. */ diff --git a/lustre/tests/mirror_io.c b/lustre/tests/mirror_io.c index 5deaeba..903a4f7 100644 --- a/lustre/tests/mirror_io.c +++ b/lustre/tests/mirror_io.c @@ -359,6 +359,48 @@ static enum resync_errors resync_parse_error(const char *err) return -1; } +ssize_t mirror_resync_one(int fd, struct llapi_layout *layout, + uint32_t dst, uint64_t start, uint64_t end) +{ + uint64_t mirror_end = 0; + ssize_t result = 0; + size_t count; + + if (end == OBD_OBJECT_EOF) + count = OBD_OBJECT_EOF; + else + count = end - start; + + while (count > 0) { + uint32_t src; + size_t to_copy; + ssize_t copied; + + src = llapi_mirror_find(layout, start, end, &mirror_end); + if (src == 0) + return -ENOENT; + + if (mirror_end == OBD_OBJECT_EOF) + to_copy = count; + else + to_copy = MIN(count, mirror_end - start); + + copied = llapi_mirror_copy(fd, src, dst, start, to_copy); + if (copied < 0) + return copied; + + result += copied; + if (copied < to_copy) /* end of file */ + break; + + if (count != OBD_OBJECT_EOF) + count -= copied; + start += copied; + } + + return result; +} + static void mirror_resync(int argc, char *argv[]) { const char *fname; @@ -464,8 +506,8 @@ static void mirror_resync(int argc, char *argv[]) } printf("\b\n"); - res = llapi_mirror_resync_one(fd, layout, mirror_id, - comp_array[idx].lrc_start, end); + res = mirror_resync_one(fd, layout, mirror_id, + comp_array[idx].lrc_start, end); if (res > 0) { int j; diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 8c7e07d..e062e34 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -7970,6 +7970,8 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc, struct llapi_layout *layout; struct stat stbuf; uint32_t flr_state; + uint64_t start; + uint64_t end; int comp_size = 0; int idx; int fd; @@ -8042,51 +8044,29 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc, goto free_layout; } - idx = 0; - while (idx < comp_size) { - ssize_t result; - uint64_t end; - __u16 mirror_id; - int i; - - rc = llapi_lease_check(fd); - if (rc != LL_LEASE_WRLCK) { - fprintf(stderr, "%s: '%s' lost lease lock.\n", - progname, fname); - goto free_layout; - } - - mirror_id = comp_array[idx].lrc_mirror_id; - end = comp_array[idx].lrc_end; - - /* try to combine adjacent component */ - for (i = idx + 1; i < comp_size; i++) { - if (mirror_id != comp_array[i].lrc_mirror_id || - end != comp_array[i].lrc_start) - break; - end = comp_array[i].lrc_end; - } - - result = llapi_mirror_resync_one(fd, layout, mirror_id, - comp_array[idx].lrc_start, - end); - if (result < 0) { - fprintf(stderr, "%s: '%s' llapi_mirror_resync_one: " - "%ld.\n", progname, fname, result); - rc = result; - goto unlock; - } else if (result > 0) { - int j; - - /* mark synced components */ - for (j = idx; j < i; j++) - comp_array[j].lrc_synced = true; - } + /* get the read range [start, end) */ + start = comp_array[0].lrc_start; + end = comp_array[0].lrc_end; + for (idx = 1; idx < comp_size; idx++) { + if (comp_array[idx].lrc_start < start) + start = comp_array[idx].lrc_start; + if (end < comp_array[idx].lrc_end) + end = comp_array[idx].lrc_end; + } - idx = i; + rc = llapi_lease_check(fd); + if (rc != LL_LEASE_WRLCK) { + fprintf(stderr, "%s: '%s' lost lease lock.\n", + progname, fname); + goto free_layout; } -unlock: + rc = llapi_mirror_resync_many(fd, layout, comp_array, comp_size, + start, end); + if (rc < 0) + fprintf(stderr, "%s: '%s' llapi_mirror_resync_many: %d.\n", + progname, fname, rc); + /* prepare ioc for lease put */ ioc->lil_mode = LL_LEASE_UNLCK; ioc->lil_flags = LL_LEASE_RESYNC_DONE; diff --git a/lustre/utils/liblustreapi_layout.c b/lustre/utils/liblustreapi_layout.c index e85b39d..a282d7a 100644 --- a/lustre/utils/liblustreapi_layout.c +++ b/lustre/utils/liblustreapi_layout.c @@ -2491,9 +2491,9 @@ error: } /* locate @layout to a valid component covering file [file_start, file_end) */ -static uint32_t llapi_mirror_find(struct llapi_layout *layout, - uint64_t file_start, uint64_t file_end, - uint64_t *endp) +uint32_t llapi_mirror_find(struct llapi_layout *layout, + uint64_t file_start, uint64_t file_end, + uint64_t *endp) { uint32_t mirror_id = 0; int rc; @@ -2546,12 +2546,21 @@ static uint32_t llapi_mirror_find(struct llapi_layout *layout, return mirror_id; } -ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout, - uint32_t dst, uint64_t start, uint64_t end) +int llapi_mirror_resync_many(int fd, struct llapi_layout *layout, + struct llapi_resync_comp *comp_array, + int comp_size, uint64_t start, uint64_t end) { - uint64_t mirror_end = 0; - ssize_t result = 0; size_t count; + size_t page_size = sysconf(_SC_PAGESIZE); + const size_t buflen = 4 << 20; /* 4M */ + void *buf; + uint64_t pos = start; + int i; + int rc; + + rc = posix_memalign(&buf, page_size, buflen); + if (rc) + return -rc; if (end == OBD_OBJECT_EOF) count = OBD_OBJECT_EOF; @@ -2560,30 +2569,86 @@ ssize_t llapi_mirror_resync_one(int fd, struct llapi_layout *layout, while (count > 0) { uint32_t src; - size_t to_copy; - ssize_t copied; + uint64_t mirror_end = 0; + ssize_t bytes_read; + size_t to_read; + size_t to_write; - src = llapi_mirror_find(layout, start, end, &mirror_end); + src = llapi_mirror_find(layout, pos, end, &mirror_end); if (src == 0) return -ENOENT; - if (mirror_end == OBD_OBJECT_EOF) - to_copy = count; - else - to_copy = MIN(count, mirror_end - start); - - copied = llapi_mirror_copy(fd, src, dst, start, to_copy); - if (copied < 0) - return copied; + if (mirror_end == OBD_OBJECT_EOF) { + to_read = count; + } else { + to_read = MIN(count, mirror_end - pos); + to_read = (to_read + page_size - 1) & ~(page_size - 1); + } + to_read = MIN(buflen, to_read); - result += copied; - if (copied < to_copy) /* end of file */ + bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos); + if (bytes_read == 0) { + /* end of file */ + break; + } + if (bytes_read < 0) { + rc = bytes_read; break; + } + + /* round up to page align to make direct IO happy. */ + to_write = (bytes_read + page_size - 1) & ~(page_size - 1); + + for (i = 0; i < comp_size; i++) { + ssize_t written; + + /* skip non-overlapped component */ + if (pos > comp_array[i].lrc_end || + pos + to_write < comp_array[i].lrc_start) + continue; + + written = llapi_mirror_write(fd, + comp_array[i].lrc_mirror_id, buf, + to_write, pos); + if (written < 0) { + /** + * this component is not written successfully, + * mark it using its lrc_synced, it is supposed + * to be false before getting here. + * + * And before this function returns, all + * elements of comp_array will reverse their + * lrc_synced flag to reflect their true + * meanings. + */ + comp_array[i].lrc_synced = true; + continue; + } + assert(written == to_write); + } - if (count != OBD_OBJECT_EOF) - count -= copied; - start += copied; + pos += bytes_read; + count -= bytes_read; } - return result; + free(buf); + + if (rc < 0) { + for (i = 0; i < comp_size; i++) + comp_array[i].lrc_synced = false; + return rc; + } + + for (i = 0; i < comp_size; i++) { + comp_array[i].lrc_synced = !comp_array[i].lrc_synced; + if (comp_array[i].lrc_synced && pos & (page_size - 1)) { + rc = llapi_mirror_truncate(fd, + comp_array[i].lrc_mirror_id, pos); + if (rc < 0) + comp_array[i].lrc_synced = false; + } + } + + /* partially successful is successful */ + return 0; } diff --git a/lustre/utils/liblustreapi_mirror.c b/lustre/utils/liblustreapi_mirror.c index 464b9fb..4a59b46 100644 --- a/lustre/utils/liblustreapi_mirror.c +++ b/lustre/utils/liblustreapi_mirror.c @@ -147,8 +147,8 @@ ssize_t llapi_mirror_read(int fd, unsigned int id, void *buf, size_t count, return result; } -static ssize_t llapi_mirror_write(int fd, unsigned int id, - const void *buf, size_t count, off_t pos) +ssize_t llapi_mirror_write(int fd, unsigned int id, const void *buf, + size_t count, off_t pos) { size_t page_size = sysconf(_SC_PAGESIZE); ssize_t result = 0; @@ -186,7 +186,7 @@ static ssize_t llapi_mirror_write(int fd, unsigned int id, return result; } -static int llapi_mirror_truncate(int fd, unsigned int id, off_t length) +int llapi_mirror_truncate(int fd, unsigned int id, off_t length) { int rc; -- 1.8.3.1