From be131d125ad128187da970963c543fb9144c947a Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Wed, 2 Aug 2023 14:46:28 +0300 Subject: [PATCH] LU-16974 utils: lfs mirror resync to show progress lfs mirror resync should be able to: - show progress like lfs mirror extend --stats does - throttle like lfs mirror extend -W does use 64MB buffer for mirror resync by default. Change-Id: Ibe60748542ff4a3731aa6a4a9907be82427a0ae9 Signed-off-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51750 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Timothy Day Reviewed-by: Oleg Drokin --- lustre/doc/lfs-mirror-extend.1 | 18 +++++ lustre/doc/lfs-mirror-resync.1 | 18 +++++ lustre/include/lustre/lustreapi.h | 6 ++ lustre/tests/sanity.sh | 64 +++++++++++++++ lustre/utils/lfs.c | 156 ++++++++++++++++++++++--------------- lustre/utils/liblustreapi_layout.c | 146 +++++++++++++++++++++++++++++++++- 6 files changed, 341 insertions(+), 67 deletions(-) diff --git a/lustre/doc/lfs-mirror-extend.1 b/lustre/doc/lfs-mirror-extend.1 index 1e00df4..c646a04 100644 --- a/lustre/doc/lfs-mirror-extend.1 +++ b/lustre/doc/lfs-mirror-extend.1 @@ -5,6 +5,9 @@ lfs-mirror-extend \- add mirror(s) to an existing file .B lfs mirror extend [\fB\-\-no\-verify\fR] <\fB\-\-mirror\-count\fR|\fB\-N\fR[\fImirror_count\fR]> +<\fB\-\-bandwidth\-limit=\fR|\fB\-W\fR[\fIbandwidth_limit\fR]> +[\fB\-\-stats\fR] +<\fB\-\-stats\-interval\fR=\fIinterval_in_sec\fR> [\fIsetstripe_options\fR|\fB\-f\fR <\fIvictim_file\fR>] ... <\fIfilename\fR> .SH DESCRIPTION @@ -63,6 +66,21 @@ option. .BR \-\-no\-verify This option indicates not to verify the mirror(s) from victim file(s) in case the victim file(s) contains the same data as the original mirrored file. +.TP +.BR \-\-stats +This option enables progress updates every 5 seconds, in YAML format. +.TP +.BR \-\-stats-interval=\fIstats_interval +This option enables progress updates every \fIstats_interval\fR seconds, in YAML format. +.TP +.BR \-\-bandwidth\-limit\fR|\fB\-W\fR\fIbandwidth_limit\fR +This option enables throttling so that mirror extending writes no more than +\fIbandwidth_limit\fR bytes a second. An optional suffix can be used to +specify the units in +.BR K ilobytes, +.BR M egabytes +or +.BR G igabytes. .SH EXAMPLES .TP .B lfs mirror extend -N2 /mnt/lustre/file1 diff --git a/lustre/doc/lfs-mirror-resync.1 b/lustre/doc/lfs-mirror-resync.1 index 34f4cac..f492c97 100644 --- a/lustre/doc/lfs-mirror-resync.1 +++ b/lustre/doc/lfs-mirror-resync.1 @@ -4,6 +4,9 @@ lfs-mirror-resync \- resynchronize an out-of-sync mirrored file .SH SYNOPSIS .B lfs mirror resync [\fB\-\-only\fR <\fImirror_id\fR[,...]>] +<\fB\-\-bandwidth\-limit=\fR|\fB\-W\fR[\fIbandwidth_limit\fR]> +[\fB\-\-stats\fR] +<\fB\-\-stats\-interval\fR=\fIinterval_in_sec\fR> <\fImirrored_file\fR> [<\fImirrored_file2\fR>...] .SH DESCRIPTION This command resynchronizes out-of-sync mirrored file(s) specified by the path @@ -27,6 +30,21 @@ This option indicates which mirror(s) specified by \fImirror_id\fR(s) needs to be resynchronized. The \fImirror_id\fR is the numerical unique identifier for a mirror. Multiple \fImirror_id\fRs are separated by comma. This option cannot be used when multiple mirrored files are specified. +.TP +.BR \-\-stats +This option enables progress updates every 5 seconds, in YAML format. +.TP +.BR \-\-stats-interval=\fIstats_interval +This option enables progress updates every \fIstats_interval\fR seconds, in YAML format. +.TP +.BR \-\-bandwidth\-limit\fR|\fB\-W\fR\fIbandwidth_limit\fR +This option enables throttling so that mirror extending writes no more than +\fIbandwidth_limit\fR bytes a second. An optional suffix can be used to +specify the units in +.BR K ilobytes, +.BR M egabytes +or +.BR G igabytes. .SH EXAMPLES .TP .B lfs mirror resync /mnt/lustre/file1 /mnt/lustre/file2 diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index b98882f..8a23b9b 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -726,6 +726,12 @@ int llapi_layout_mirror_inherit(struct llapi_layout *f_layout, int llapi_mirror_find_stale(struct llapi_layout *layout, struct llapi_resync_comp *comp, size_t comp_size, __u16 *mirror_ids, int ids_nr); +int llapi_mirror_resync_many_params(int fd, struct llapi_layout *layout, + struct llapi_resync_comp *comp_array, + int comp_size, uint64_t start, + uint64_t end, + unsigned long stats_interval_sec, + unsigned long bandwidth_bytes_sec); int llapi_mirror_resync_many(int fd, struct llapi_layout *layout, struct llapi_resync_comp *comp_array, int comp_size, uint64_t start, uint64_t end); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b345dbc..b659904 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8318,6 +8318,70 @@ test_56xj() { # LU-16571 "lfs migrate -b" can cause thread starvation on OSS } run_test 56xj "lfs migrate -b should not cause starvation of threads on OSS" +test_56xk() { + (( $OSTCOUNT >= 2 )) || skip "needs >= 2 OSTs" + + local size_mb=5 + local file1=$DIR/$tfile + + stack_trap "rm -f $file1" + $LFS setstripe -c 1 $file1 + dd if=/dev/zero of=$file1 bs=1M count=$size_mb || + error "error creating $file1" + $LFS mirror extend -N $file1 || error "can't mirror" + dd if=/dev/zero of=$file1 bs=4k count=1 conv=notrunc || + error "can't dd" + $LFS getstripe $file1 | grep stale || + error "one component must be stale" + + local start=$SECONDS + $LFS mirror resync --stats --stats-interval=1 -W 1M $file1 || + error "migrate failed rc = $?" + local elapsed=$((SECONDS - start)) + $LFS getstripe $file1 | grep stale && + error "all components must be sync" + + # with 1MB/s, elapsed should equal size_mb + (( elapsed >= size_mb * 95 / 100 )) || + error "'lfs mirror resync -W' too fast ($elapsed < 0.95 * $size_mb)?" + + (( elapsed <= size_mb * 120 / 100 )) || + error_not_in_vm "'lfs mirror resync -W' slow ($elapsed > 1.2 * $size_mb)" + + (( elapsed <= size_mb * 350 / 100 )) || + error "'lfs mirror resync -W' too slow in VM ($elapsed > 3.5 * $size_mb)" +} +run_test 56xk "lfs mirror resync bandwidth limitation support" + +test_56xl() { + (( $OSTCOUNT >= 2 )) || skip "needs >= 2 OSTs" + verify_yaml_available || skip_env "YAML verification not installed" + + local size_mb=5 + local file1=$DIR/$tfile.1 + local output_file=$DIR/$tfile.out + + stack_trap "rm -f $file1" + $LFS setstripe -c 1 $file1 + dd if=/dev/zero of=$file1 bs=1M count=$size_mb || + error "error creating $file1" + $LFS mirror extend -N $file1 || error "can't mirror" + dd if=/dev/zero of=$file1 bs=4k count=1 conv=notrunc || + error "can't dd" + $LFS getstripe $file1 | grep stale || + error "one component must be stale" + $LFS getstripe $file1 + + $LFS mirror resync --stats --stats-interval=1 $file1 >$output_file || + error "resync failed rc = $?" + $LFS getstripe $file1 | grep stale && + error "all components must be sync" + + cat $output_file + cat $output_file | verify_yaml || error "stats is not valid YAML" +} +run_test 56xl "lfs mirror resync stats support" + test_56y() { [ $MDS1_VERSION -lt $(version_code 2.4.53) ] && skip "No HSM $(lustre_build_version $SINGLEMDS) MDS < 2.4.53" diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 1e10042..e8695a5 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -136,15 +136,9 @@ static int lfs_pcc_detach_fid(int argc, char **argv); static int lfs_pcc_state(int argc, char **argv); static int lfs_pcc(int argc, char **argv); -enum stats_flag { - STATS_ON, - STATS_OFF, -}; - static int lfs_migrate_to_dom(int fd_src, int fd_dst, char *name, __u64 migration_flags, unsigned long long bandwidth_bytes_sec, - enum stats_flag stats_flag, long stats_interval_sec); struct pool_to_id_cbdata { @@ -258,8 +252,10 @@ command_t mirror_cmdlist[] = { { .pc_name = "extend", .pc_func = lfs_mirror_extend, .pc_help = "Extend a mirrored file.\n" "usage: lfs mirror extend " - "{--mirror-count|-N[MIRROR_COUNT]} [--no-verify] " - "[SETSTRIPE_OPTIONS|-f VICTIM_FILE] ... FILENAME ...\n" }, + "{--mirror-count|-N[MIRROR_COUNT]} [--no-verify]|\n" + "\t\t--stats|--stats-interval=|\n" + "\t\t--W |--bandwidth-limit=\n" + "\t\t[SETSTRIPE_OPTIONS|-f VICTIM_FILE] ... FILENAME ...\n" }, { .pc_name = "split", .pc_func = lfs_mirror_split, .pc_help = "Split a mirrored file.\n" "usage: lfs mirror split {--mirror-id MIRROR_ID |\n" @@ -279,7 +275,9 @@ command_t mirror_cmdlist[] = { "\t\t{--write-mirror|-o MIRROR_ID1[,...]} \n" }, { .pc_name = "resync", .pc_func = lfs_mirror_resync, .pc_help = "Resynchronizes out-of-sync mirrored file(s).\n" - "usage: lfs mirror resync [--only MIRROR_ID[,...]>]\n" + "usage: lfs mirror resync [--only MIRROR_ID[,...]>]|\n" + "\t\t--stats|--stats-interval=|\n" + "\t\t--W |--bandwidth-limit=\n" "\t\t [...]\n" }, { .pc_name = "verify", .pc_func = lfs_mirror_verify, .pc_help = "Verify mirrored file(s).\n" @@ -791,28 +789,29 @@ struct timespec timespec_sub(struct timespec *before, struct timespec *after) } static void stats_log(struct timespec *now, struct timespec *start_time, - enum stats_flag stats_flag, ssize_t read_bytes, size_t write_bytes, off_t file_size_bytes) { struct timespec diff = timespec_sub(start_time, now); - if (stats_flag == STATS_ON && ((diff.tv_sec != 0) || - (diff.tv_nsec != 0)) && file_size_bytes != 0) - printf("- { seconds: %li, rmbps: %5.2g, wmbps: %5.2g, copied: %lu, size: %lu, pct: %lu%% }\n", - diff.tv_sec, - (double) read_bytes/((ONE_MB * diff.tv_sec) + - ((ONE_MB * diff.tv_nsec)/NSEC_PER_SEC)), - (double) write_bytes/((ONE_MB * diff.tv_sec) + - ((ONE_MB * diff.tv_nsec)/NSEC_PER_SEC)), - write_bytes/ONE_MB, - file_size_bytes/ONE_MB, - ((write_bytes*100)/file_size_bytes)); + if (file_size_bytes == 0) + return; + + if (diff.tv_sec == 0 && diff.tv_nsec == 0) + return; + + printf("- { seconds: %li, rmbps: %5.2g, wmbps: %5.2g, copied: %lu, size: %lu, pct: %lu%% }\n", + diff.tv_sec, + (double) read_bytes/((ONE_MB * diff.tv_sec) + + ((ONE_MB * diff.tv_nsec)/NSEC_PER_SEC)), + (double) write_bytes/((ONE_MB * diff.tv_sec) + + ((ONE_MB * diff.tv_nsec)/NSEC_PER_SEC)), + write_bytes/ONE_MB, file_size_bytes/ONE_MB, + ((write_bytes*100)/file_size_bytes)); } static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int), unsigned long long bandwidth_bytes_sec, - enum stats_flag stats_flag, long stats_interval_sec, off_t file_size_bytes) { struct llapi_layout *layout; @@ -954,7 +953,7 @@ static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int), } while (rc < 0 && errno == EINTR); if (rc < 0) { - if (stats_flag == STATS_OFF) + if (stats_interval_sec) fprintf(stderr, "error %s: delay for bandwidth control failed: %s\n", progname, @@ -965,10 +964,10 @@ static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int), } clock_gettime(CLOCK_REALTIME, &now); - if ((write_bytes != file_size_bytes) && + if (stats_interval_sec && (write_bytes != file_size_bytes) && (now.tv_sec >= last_bw_print.tv_sec + stats_interval_sec)) { - stats_log(&now, &start_time, stats_flag, + stats_log(&now, &start_time, read_bytes, write_bytes, file_size_bytes); last_bw_print = now; @@ -979,10 +978,11 @@ static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int), } /* Output at least one log, regardless of stats_interval */ - clock_gettime(CLOCK_REALTIME, &now); - stats_log(&now, &start_time, stats_flag, - read_bytes, write_bytes, - file_size_bytes); + if (stats_interval_sec) { + clock_gettime(CLOCK_REALTIME, &now); + stats_log(&now, &start_time, read_bytes, write_bytes, + file_size_bytes); + } rc = fsync(fd_dst); if (rc < 0) @@ -1008,7 +1008,6 @@ static int migrate_set_timestamps(int fd, const struct stat *st) static int migrate_block(int fd_src, int fd_dst, unsigned long long bandwidth_bytes_sec, - enum stats_flag stats_flag, long stats_interval_sec) { struct stat st; @@ -1048,8 +1047,7 @@ static int migrate_block(int fd_src, int fd_dst, } rc = migrate_copy_data(fd_src, fd_dst, NULL, bandwidth_bytes_sec, - stats_flag, stats_interval_sec, - st.st_size); + stats_interval_sec, st.st_size); if (rc < 0) { error_loc = "data copy failed"; goto out_unlock; @@ -1111,7 +1109,6 @@ static int check_lease(int fd) static int migrate_nonblock(int fd_src, int fd_dst, unsigned long long bandwidth_bytes_sec, - enum stats_flag stats_flag, long stats_interval_sec) { struct stat st; @@ -1132,7 +1129,7 @@ static int migrate_nonblock(int fd_src, int fd_dst, } rc = migrate_copy_data(fd_src, fd_dst, check_lease, - bandwidth_bytes_sec, stats_flag, + bandwidth_bytes_sec, stats_interval_sec, st.st_size); if (rc < 0) { error_loc = "data copy failed"; @@ -1340,7 +1337,7 @@ static int lfs_migrate(char *name, __u64 migration_flags, struct llapi_stripe_param *param, struct llapi_layout *layout, unsigned long long bandwidth_bytes_sec, - enum stats_flag stats_flag, long stats_interval_sec) + long stats_interval_sec) { struct llapi_layout *existing; uint64_t dom_new, dom_cur; @@ -1377,15 +1374,16 @@ static int lfs_migrate(char *name, __u64 migration_flags, * if new layout used bigger DOM size, then mirroring is used */ if (dom_new > dom_cur) { - rc = lfs_migrate_to_dom(fd_src, fd_dst, name, migration_flags, - bandwidth_bytes_sec, stats_flag, + rc = lfs_migrate_to_dom(fd_src, fd_dst, name, + migration_flags, + bandwidth_bytes_sec, stats_interval_sec); if (rc) error_loc = "cannot migrate to DOM layout"; goto out_closed; } - if (stats_flag == STATS_ON) + if (stats_interval_sec) printf("%s:\n", name); if (!(migration_flags & LLAPI_MIGRATION_NONBLOCK)) { @@ -1396,7 +1394,7 @@ static int lfs_migrate(char *name, __u64 migration_flags, * atomic swap/close (LU-6785) */ rc = migrate_block(fd_src, fd_dst, bandwidth_bytes_sec, - stats_flag, stats_interval_sec); + stats_interval_sec); goto out; } @@ -1406,7 +1404,7 @@ static int lfs_migrate(char *name, __u64 migration_flags, goto out; } - rc = migrate_nonblock(fd_src, fd_dst, bandwidth_bytes_sec, stats_flag, + rc = migrate_nonblock(fd_src, fd_dst, bandwidth_bytes_sec, stats_interval_sec); if (rc < 0) { llapi_lease_release(fd_src); @@ -1956,7 +1954,6 @@ out: static int mirror_extend_layout(char *name, struct llapi_layout *m_layout, bool inherit, uint32_t flags, unsigned long long bandwidth_bytes_sec, - enum stats_flag stats_flag, long stats_interval_sec) { struct llapi_layout *f_layout = NULL; @@ -2007,10 +2004,10 @@ static int mirror_extend_layout(char *name, struct llapi_layout *m_layout, goto out; } - if (stats_flag) + if (stats_interval_sec) printf("%s:\n", name); - rc = migrate_nonblock(fd_src, fd_dst, bandwidth_bytes_sec, stats_flag, + rc = migrate_nonblock(fd_src, fd_dst, bandwidth_bytes_sec, stats_interval_sec); if (rc < 0) { llapi_lease_release(fd_src); @@ -2060,7 +2057,7 @@ out: static int mirror_extend(char *fname, struct mirror_args *mirror_list, enum mirror_flags mirror_flags, unsigned long long bandwidth_bytes_sec, - enum stats_flag stats_flag, long stats_interval_sec) + long stats_interval_sec) { int rc = 0; @@ -2077,7 +2074,6 @@ static int mirror_extend(char *fname, struct mirror_args *mirror_list, mirror_list->m_inherit, mirror_list->m_flags, bandwidth_bytes_sec, - stats_flag, stats_interval_sec); if (rc) break; @@ -2507,12 +2503,13 @@ free_layout: static inline int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc, - __u16 *mirror_ids, int ids_nr); + __u16 *mirror_ids, int ids_nr, + long stats_interval_sec, long bandwidth_bytes_sec); + static int lfs_migrate_to_dom(int fd_src, int fd_dst, char *name, __u64 migration_flags, unsigned long long bandwidth_bytes_sec, - enum stats_flag stats_flag, long stats_interval_sec) { struct ll_ioc_lease *data = NULL; @@ -2524,10 +2521,10 @@ static int lfs_migrate_to_dom(int fd_src, int fd_dst, char *name, goto out_close; } - if (stats_flag) + if (stats_interval_sec) printf("%s:\n", name); - rc = migrate_nonblock(fd_src, fd_dst, bandwidth_bytes_sec, stats_flag, + rc = migrate_nonblock(fd_src, fd_dst, bandwidth_bytes_sec, stats_interval_sec); if (rc < 0) goto out_release; @@ -2554,7 +2551,9 @@ static int lfs_migrate_to_dom(int fd_src, int fd_dst, char *name, close(fd_src); close(fd_dst); - rc = lfs_mirror_resync_file(name, data, NULL, 0); + rc = lfs_mirror_resync_file(name, data, NULL, 0, + stats_interval_sec, + bandwidth_bytes_sec); if (rc) { error_loc = "cannot resync file"; goto out; @@ -3569,8 +3568,7 @@ static int lfs_setstripe_internal(int argc, char **argv, mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; unsigned long long bandwidth_bytes_sec = 0; unsigned long long bandwidth_unit = ONE_MB; - enum stats_flag stats_flag = STATS_OFF; - long stats_interval_sec = 5; + long stats_interval_sec = 0; struct option long_opts[] = { /* find { .val = '0', .name = "null", .has_arg = no_argument }, */ @@ -3822,13 +3820,16 @@ static int lfs_setstripe_internal(int argc, char **argv, template = optarg; break; case LFS_STATS_OPT: - stats_flag = STATS_ON; + stats_interval_sec = 5; break; case LFS_STATS_INTERVAL_OPT: - stats_flag = STATS_ON; stats_interval_sec = strtol(optarg, &end, 0); - if (stats_interval_sec == 0) - stats_interval_sec = 5; + if (stats_interval_sec == 0 && errno) { + fprintf(stderr, + "%s %s: invalid stats interval %s\n", + progname, argv[0], optarg); + goto usage_error; + } break; case 'b': if (!migrate_mode) { @@ -4529,7 +4530,7 @@ static int lfs_setstripe_internal(int argc, char **argv, result = lfs_migrate(fname, migration_flags, param, layout, bandwidth_bytes_sec, - stats_flag, stats_interval_sec); + stats_interval_sec); } else if (comp_set != 0) { result = lfs_component_set(fname, comp_id, lsa.lsa_pool_name, @@ -4547,7 +4548,7 @@ static int lfs_setstripe_internal(int argc, char **argv, result = mirror_extend(fname, mirror_list, mirror_flags, bandwidth_bytes_sec, - stats_flag, stats_interval_sec); + stats_interval_sec); } else if (opc == SO_MIRROR_SPLIT || opc == SO_MIRROR_DELETE) { if (!mirror_id && !comp_id && !lsa.lsa_pool_name) { fprintf(stderr, @@ -11180,7 +11181,8 @@ error: static inline int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc, - __u16 *mirror_ids, int ids_nr) + __u16 *mirror_ids, int ids_nr, + long stats_interval_sec, long bandwidth_bytes_sec) { struct llapi_resync_comp comp_array[1024] = { { 0 } }; struct llapi_layout *layout; @@ -11279,8 +11281,9 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc, goto free_layout; } - rc = llapi_mirror_resync_many(fd, layout, comp_array, comp_size, - start, end); + rc = llapi_mirror_resync_many_params(fd, layout, comp_array, comp_size, + start, end, stats_interval_sec, + bandwidth_bytes_sec); if (rc < 0) fprintf(stderr, "%s: '%s' llapi_mirror_resync_many: %s.\n", progname, fname, strerror(-rc)); @@ -11335,14 +11338,24 @@ static inline int lfs_mirror_resync(int argc, char **argv) struct option long_opts[] = { { .val = 'h', .name = "help", .has_arg = no_argument }, { .val = 'o', .name = "only", .has_arg = required_argument }, + { .val = 'W', .name = "bandwidth", .has_arg = required_argument }, + { .val = LFS_STATS_OPT, + .name = "stats", .has_arg = no_argument}, + { .val = LFS_STATS_INTERVAL_OPT, + .name = "stats-interval", + .has_arg = required_argument}, { .name = NULL } }; struct ll_ioc_lease *ioc = NULL; __u16 mirror_ids[128] = { 0 }; + unsigned int stats_interval_sec = 0; + unsigned long long bandwidth_bytes_sec = 0; + unsigned long long bandwidth_unit = ONE_MB; int ids_nr = 0; int c; int rc = 0; - while ((c = getopt_long(argc, argv, "ho:", long_opts, NULL)) >= 0) { + while ((c = getopt_long(argc, argv, "ho:W:", long_opts, NULL)) >= 0) { + char *end; switch (c) { case 'o': rc = parse_mirror_ids(mirror_ids, @@ -11356,6 +11369,21 @@ static inline int lfs_mirror_resync(int argc, char **argv) } ids_nr = rc; break; + case 'W': + if (llapi_parse_size(optarg, &bandwidth_bytes_sec, + &bandwidth_unit, 0) < 0) { + fprintf(stderr, + "error: %s: bad value for bandwidth '%s'\n", + argv[0], optarg); + goto error; + } + break; + case LFS_STATS_OPT: + stats_interval_sec = 5; + break; + case LFS_STATS_INTERVAL_OPT: + stats_interval_sec = strtol(optarg, &end, 0); + break; default: fprintf(stderr, "%s: unrecognized option '%s'\n", progname, argv[optind - 1]); @@ -11397,7 +11425,9 @@ static inline int lfs_mirror_resync(int argc, char **argv) for (; optind < argc; optind++) { rc = lfs_mirror_resync_file(argv[optind], ioc, - mirror_ids, ids_nr); + mirror_ids, ids_nr, + stats_interval_sec, + bandwidth_bytes_sec); /* ignore previous file's error, continue with next file */ /* reset ioc */ diff --git a/lustre/utils/liblustreapi_layout.c b/lustre/utils/liblustreapi_layout.c index 47b354e..271e44b 100644 --- a/lustre/utils/liblustreapi_layout.c +++ b/lustre/utils/liblustreapi_layout.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -2930,12 +2932,59 @@ int llapi_mirror_find(struct llapi_layout *layout, uint64_t file_start, return mirror_id; } -int llapi_mirror_resync_many(int fd, struct llapi_layout *layout, - struct llapi_resync_comp *comp_array, - int comp_size, uint64_t start, uint64_t end) +#ifndef NSEC_PER_SEC +# define NSEC_PER_SEC 1000000000UL +#endif +#define ONE_MB 0x100000 +static struct timespec timespec_sub(struct timespec *before, + struct timespec *after) +{ + struct timespec ret; + + ret.tv_sec = after->tv_sec - before->tv_sec; + if (after->tv_nsec < before->tv_nsec) { + ret.tv_sec--; + ret.tv_nsec = NSEC_PER_SEC + after->tv_nsec - before->tv_nsec; + } else { + ret.tv_nsec = after->tv_nsec - before->tv_nsec; + } + + return ret; +} + +static void stats_log(struct timespec *now, struct timespec *start_time, + ssize_t read_bytes, size_t write_bytes, + off_t file_size_bytes) +{ + struct timespec diff = timespec_sub(start_time, now); + + if (file_size_bytes == 0) + return; + + if (diff.tv_sec == 0 && diff.tv_nsec == 0) + return; + + llapi_printf(LLAPI_MSG_NORMAL, + "- { seconds: %li, rmbps: %5.2g, wmbps: %5.2g, copied: %lu, size: %lu, pct: %lu%% }\n", + diff.tv_sec, + (double) read_bytes/((ONE_MB * diff.tv_sec) + + ((ONE_MB * diff.tv_nsec)/NSEC_PER_SEC)), + (double) write_bytes/((ONE_MB * diff.tv_sec) + + ((ONE_MB * diff.tv_nsec)/NSEC_PER_SEC)), + write_bytes/ONE_MB, + file_size_bytes/ONE_MB, + ((write_bytes*100)/file_size_bytes)); +} + +int llapi_mirror_resync_many_params(int fd, struct llapi_layout *layout, + struct llapi_resync_comp *comp_array, + int comp_size, uint64_t start, + uint64_t end, + unsigned long stats_interval_sec, + unsigned long bandwidth_bytes_sec) { size_t page_size = sysconf(_SC_PAGESIZE); - const size_t buflen = 4 << 20; /* 4M */ + size_t buflen = 64 << 20; /* 64M */ void *buf; uint64_t pos = start; uint64_t data_off = pos, data_end = pos; @@ -2944,11 +2993,32 @@ int llapi_mirror_resync_many(int fd, struct llapi_layout *layout, int i; int rc; int rc2 = 0; + struct timespec start_time; + struct timespec now; + struct timespec last_bw_print; + size_t total_bytes_read = 0; + size_t total_bytes_written = 0; + off_t write_estimation_bytes = 0; + + if (bandwidth_bytes_sec > 0 || stats_interval_sec) { + struct stat st; + rc = fstat(fd, &st); + if (rc < 0) + return -errno; + write_estimation_bytes = st.st_size * comp_size; + } + + /* limit transfer size to what can be sent in one second */ + if (bandwidth_bytes_sec && bandwidth_bytes_sec < buflen) + buflen = (bandwidth_bytes_sec + ONE_MB - 1) & ~(ONE_MB - 1); rc = posix_memalign(&buf, page_size, buflen); if (rc) return -rc; + clock_gettime(CLOCK_MONOTONIC, &start_time); + now = last_bw_print = start_time; + while (pos < end) { ssize_t bytes_read; size_t to_read; @@ -3058,11 +3128,14 @@ do_read: rc = bytes_read; break; } + total_bytes_read += bytes_read; /* round up to page align to make direct IO happy. */ to_write = ((bytes_read - 1) | (page_size - 1)) + 1; for (i = 0; i < comp_size; i++) { + unsigned long long write_target; + struct timespec diff; ssize_t written; off_t pos2 = pos; size_t to_write2 = to_write; @@ -3105,6 +3178,55 @@ do_read: continue; } assert(written == to_write2); + total_bytes_written += written; + + if (bandwidth_bytes_sec == 0) + continue; + + clock_gettime(CLOCK_MONOTONIC, &now); + diff = timespec_sub(&start_time, &now); + write_target = ((bandwidth_bytes_sec * diff.tv_sec) + + ((bandwidth_bytes_sec * + diff.tv_nsec)/NSEC_PER_SEC)); + + if (write_target < total_bytes_written) { + unsigned long long excess; + struct timespec delay = { 0, 0 }; + + excess = total_bytes_written - write_target; + + if (excess == 0) + continue; + + delay.tv_sec = excess / bandwidth_bytes_sec; + delay.tv_nsec = (excess % bandwidth_bytes_sec) * + NSEC_PER_SEC / bandwidth_bytes_sec; + + do { + rc = clock_nanosleep(CLOCK_MONOTONIC, 0, + &delay, &delay); + } while (rc < 0 && errno == EINTR); + + if (rc < 0) { + llapi_error(LLAPI_MSG_ERROR, rc, + "errors: delay for bandwidth control failed: %s\n", + strerror(-rc)); + rc = 0; + } + } + + if (stats_interval_sec) { + clock_gettime(CLOCK_MONOTONIC, &now); + if ((total_bytes_written != end - start) && + (now.tv_sec >= last_bw_print.tv_sec + + stats_interval_sec)) { + stats_log(&now, &start_time, + total_bytes_read, + total_bytes_written, + write_estimation_bytes); + last_bw_print = now; + } + } } pos += bytes_read; } @@ -3118,6 +3240,14 @@ do_read: return rc; } + /* Output at least one log, regardless of stats_interval */ + if (stats_interval_sec) { + clock_gettime(CLOCK_MONOTONIC, &now); + stats_log(&now, &start_time, total_bytes_read, + total_bytes_written, + write_estimation_bytes); + } + /** * no fatal error happens, each lrc_synced tells whether the component * has been resync successfully (note: we'd reverse the value to @@ -3144,6 +3274,14 @@ do_read: return rc2; } +int llapi_mirror_resync_many(int fd, struct llapi_layout *layout, + struct llapi_resync_comp *comp_array, + int comp_size, uint64_t start, uint64_t end) +{ + return llapi_mirror_resync_many_params(fd, layout, comp_array, + comp_size, start, end, 0, 0); +} + enum llapi_layout_comp_sanity_error { LSE_OK, LSE_INCOMPLETE_MIRROR, -- 1.8.3.1