X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Flhsmtool_posix.c;h=fa9f009962a1cf3f0423b8b2873037c57c7048a0;hb=51828ff66904be2a56a8790c3c9ce562a7c8e2e6;hp=dd496f13bd3a96382c5da86355611326f4e12a64;hpb=6231637eeef0998c224c9b23d52da1397476f0b6;p=fs%2Flustre-release.git diff --git a/lustre/utils/lhsmtool_posix.c b/lustre/utils/lhsmtool_posix.c index dd496f1..fa9f009 100644 --- a/lustre/utils/lhsmtool_posix.c +++ b/lustre/utils/lhsmtool_posix.c @@ -23,6 +23,7 @@ * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies * alternatives * + * Copyright (c) 2013, 2016, Intel Corporation. */ /* HSM copytool program for POSIX filesystem-based HSM's. * @@ -33,12 +34,30 @@ * This particular tool can also import an existing HSM archive. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include #include -#include -#include -#include -#include +#include +#include + +#include +#include #include /* Progress reporting period */ @@ -50,11 +69,9 @@ #define ONE_MB 0x100000 -/* copytool uses a 32b bitmask field to register with kuc - * archive num = 0 => all - * archive num from 1 to 32 - */ -#define MAX_ARCHIVE_CNT (sizeof(__u32) * 8) +#ifndef NSEC_PER_SEC +# define NSEC_PER_SEC 1000000000UL +#endif enum ct_action { CA_IMPORT = 1, @@ -71,12 +88,14 @@ struct options { int o_verbose; int o_copy_xattrs; int o_archive_cnt; - int o_archive_id[MAX_ARCHIVE_CNT]; + int o_archive_id[LL_HSM_MAX_ARCHIVE]; int o_report_int; unsigned long long o_bandwidth; size_t o_chunk_size; enum ct_action o_action; + char *o_event_fifo; char *o_mnt; + int o_mnt_fd; char *o_hsm_root; char *o_src; /* for import, or rebind */ char *o_dst; /* for import, or rebind */ @@ -86,15 +105,17 @@ struct options { struct options opt = { .o_copy_attrs = 1, .o_shadow_tree = 1, - .o_verbose = LLAPI_MSG_WARN, + .o_verbose = LLAPI_MSG_INFO, .o_copy_xattrs = 1, .o_report_int = REPORT_INTERVAL_DEFAULT, .o_chunk_size = ONE_MB, }; -/* The LLAPI will hold an open FD on lustre for us. Additionally open one on - * the archive FS root to make sure it doesn't drop out from under us (and - * remind the admin to shutdown the copytool before unmounting). */ +/* hsm_copytool_private will hold an open FD on the lustre mount point + * for us. Additionally open one on the archive FS root to make sure + * it doesn't drop out from under us (and remind the admin to shutdown + * the copytool before unmounting). */ + static int arc_fd = -1; static int err_major; @@ -105,15 +126,34 @@ static char fs_name[MAX_OBD_NAME + 1]; static struct hsm_copytool_private *ctdata; +static inline double ct_now(void) +{ + struct timeval tv; -#define CT_ERROR(format, ...) \ - llapi_printf(LLAPI_MSG_ERROR, "%s: "format, cmd_name, ## __VA_ARGS__) -#define CT_DEBUG(format, ...) \ - llapi_printf(LLAPI_MSG_DEBUG, "%s: "format, cmd_name, ## __VA_ARGS__) -#define CT_WARN(format, ...) \ - llapi_printf(LLAPI_MSG_WARN, "%s: "format, cmd_name, ## __VA_ARGS__) -#define CT_TRACE(format, ...) \ - llapi_printf(LLAPI_MSG_INFO, "%s: "format, cmd_name, ## __VA_ARGS__) + gettimeofday(&tv, NULL); + + return tv.tv_sec + 0.000001 * tv.tv_usec; +} + +#define CT_ERROR(_rc, _format, ...) \ + llapi_error(LLAPI_MSG_ERROR, _rc, \ + "%f %s[%ld]: "_format, \ + ct_now(), cmd_name, syscall(SYS_gettid), ## __VA_ARGS__) + +#define CT_DEBUG(_format, ...) \ + llapi_error(LLAPI_MSG_DEBUG | LLAPI_MSG_NO_ERRNO, 0, \ + "%f %s[%ld]: "_format, \ + ct_now(), cmd_name, syscall(SYS_gettid), ## __VA_ARGS__) + +#define CT_WARN(_format, ...) \ + llapi_error(LLAPI_MSG_WARN | LLAPI_MSG_NO_ERRNO, 0, \ + "%f %s[%ld]: "_format, \ + ct_now(), cmd_name, syscall(SYS_gettid), ## __VA_ARGS__) + +#define CT_TRACE(_format, ...) \ + llapi_error(LLAPI_MSG_INFO | LLAPI_MSG_NO_ERRNO, 0, \ + "%f %s[%ld]: "_format, \ + ct_now(), cmd_name, syscall(SYS_gettid), ## __VA_ARGS__) static void usage(const char *name, int rc) { @@ -136,9 +176,10 @@ static void usage(const char *name, int rc) "into a Lustre filesystem.\n" " Usage:\n" " %s [options] --import \n" - " import an archived subtree at\n" - " (relative to hsm_root) into the Lustre filesystem at\n" - " (absolute)\n" + " import an archived subtree from\n" + " (FID or relative path to hsm_root) into the Lustre\n" + " filesystem at\n" + " (absolute path)\n" " %s [options] --rebind \n" " rebind an entry in the HSM to a new FID\n" " old FID the HSM entry is bound to\n" @@ -148,16 +189,19 @@ static void usage(const char *name, int rc) " each line of consists of \n" " %s [options] --max-sequence \n" " return the max fid sequence of archived files\n" - " -A, --archive <#> Archive number (repeatable)\n" - " -p, --hsm-root Target HSM mount point\n" - " -q, --quiet Produce less verbose output\n" - " -v, --verbose Produce more verbose output\n" - " -c, --chunk-size I/O size used during data copy\n" - " (unit can be used, default is MB)\n" - " --abort-on-error Abort operation on major error\n" - " --dry-run Don't run, just show what would be done\n" - " --bandwidth Limit I/O bandwidth (unit can be used\n," - " default is MB)\n", + " --abort-on-error Abort operation on major error\n" + " -A, --archive <#> Archive number (repeatable)\n" + " -b, --bandwidth Limit I/O bandwidth (unit can be used\n," + " default is MB)\n" + " --dry-run Don't run, just show what would be done\n" + " -c, --chunk-size I/O size used during data copy\n" + " (unit can be used, default is MB)\n" + " -f, --event-fifo Write events stream to fifo\n" + " -p, --hsm-root Target HSM mount point\n" + " -q, --quiet Produce less verbose output\n" + " -u, --update-interval Interval between progress reports sent\n" + " to Coordinator\n" + " -v, --verbose Produce more verbose output\n", cmd_name, cmd_name, cmd_name, cmd_name, cmd_name); exit(rc); @@ -173,6 +217,8 @@ static int ct_parseopts(int argc, char * const *argv) {"chunk-size", required_argument, NULL, 'c'}, {"chunk_size", required_argument, NULL, 'c'}, {"daemon", no_argument, &opt.o_daemonize, 1}, + {"event-fifo", required_argument, NULL, 'f'}, + {"event_fifo", required_argument, NULL, 'f'}, {"dry-run", no_argument, &opt.o_dry_run, 1}, {"help", no_argument, NULL, 'h'}, {"hsm-root", required_argument, NULL, 'p'}, @@ -188,24 +234,26 @@ static int ct_parseopts(int argc, char * const *argv) {"no_xattr", no_argument, &opt.o_copy_xattrs, 0}, {"quiet", no_argument, NULL, 'q'}, {"rebind", no_argument, NULL, 'r'}, - {"report", required_argument, &opt.o_report_int, 0}, + {"update-interval", required_argument, NULL, 'u'}, + {"update_interval", required_argument, NULL, 'u'}, {"verbose", no_argument, NULL, 'v'}, {0, 0, 0, 0} }; - int c; + int c, rc; unsigned long long value; unsigned long long unit; optind = 0; - while ((c = getopt_long(argc, argv, "A:b:c:hiMp:qrv", + while ((c = getopt_long(argc, argv, "A:b:c:f:hiMp:qru:v", long_opts, NULL)) != -1) { switch (c) { case 'A': - if ((opt.o_archive_cnt >= MAX_ARCHIVE_CNT) || - (atoi(optarg) >= MAX_ARCHIVE_CNT)) { - CT_ERROR("archive number must be less" - "than %lu\n", MAX_ARCHIVE_CNT); - return -E2BIG; + if ((opt.o_archive_cnt >= LL_HSM_MAX_ARCHIVE) || + (atoi(optarg) >= LL_HSM_MAX_ARCHIVE)) { + rc = -E2BIG; + CT_ERROR(rc, "archive number must be less" + "than %zu", LL_HSM_MAX_ARCHIVE); + return rc; } opt.o_archive_id[opt.o_archive_cnt] = atoi(optarg); opt.o_archive_cnt++; @@ -214,14 +262,19 @@ static int ct_parseopts(int argc, char * const *argv) case 'c': unit = ONE_MB; if (llapi_parse_size(optarg, &value, &unit, 0) < 0) { - CT_ERROR("bad value for -%c '%s'\n", c, optarg); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "bad value for -%c '%s'", c, + optarg); + return rc; } if (c == 'c') opt.o_chunk_size = value; else opt.o_bandwidth = value; break; + case 'f': + opt.o_event_fifo = optarg; + break; case 'h': usage(argv[0], 0); case 'i': @@ -239,14 +292,21 @@ static int ct_parseopts(int argc, char * const *argv) case 'r': opt.o_action = CA_REBIND; break; + case 'u': + opt.o_report_int = atoi(optarg); + if (opt.o_report_int < 0) { + rc = -EINVAL; + CT_ERROR(rc, "bad value for -%c '%s'", c, + optarg); + return rc; + } + break; case 'v': opt.o_verbose++; break; case 0: break; default: - CT_ERROR("unrecognized option '%s'\n", - argv[optind - 1]); return -EINVAL; } } @@ -255,8 +315,9 @@ static int ct_parseopts(int argc, char * const *argv) case CA_IMPORT: /* src dst mount_point */ if (argc != optind + 3) { - CT_ERROR("--import requires 2 arguments\n"); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "--import requires 2 arguments"); + return rc; } opt.o_src = argv[optind++]; opt.o_dst = argv[optind++]; @@ -270,8 +331,9 @@ static int ct_parseopts(int argc, char * const *argv) opt.o_src = argv[optind++]; opt.o_dst = argv[optind++]; } else { - CT_ERROR("--rebind requires 1 or 2 arguments\n"); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "--rebind requires 1 or 2 arguments"); + return rc; } break; case CA_MAXSEQ: @@ -281,29 +343,35 @@ static int ct_parseopts(int argc, char * const *argv) } if (argc != optind + 1) { - CT_ERROR("no mount point specified\n"); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "no mount point specified"); + return rc; } opt.o_mnt = argv[optind]; + opt.o_mnt_fd = -1; - CT_TRACE("action=%d src=%s dst=%s mount_point=%s\n", + CT_TRACE("action=%d src=%s dst=%s mount_point=%s", opt.o_action, opt.o_src, opt.o_dst, opt.o_mnt); - if (!opt.o_dry_run && opt.o_hsm_root == NULL) { - CT_ERROR("must specify a HSM root\n"); - return -EINVAL; + if (opt.o_hsm_root == NULL) { + rc = -EINVAL; + CT_ERROR(rc, "must specify a root directory for the backend"); + return rc; } if (opt.o_action == CA_IMPORT) { if (opt.o_src && opt.o_src[0] == '/') { - CT_ERROR("source path must be relative to HSM root.\n"); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, + "source path must be relative to HSM root"); + return rc; } if (opt.o_dst && opt.o_dst[0] != '/') { - CT_ERROR("destination path must be absolute.\n"); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "destination path must be absolute"); + return rc; } } @@ -317,6 +385,9 @@ static int ct_mkdir_p(const char *path) int rc; ptr = strdup(path); + if (ptr == NULL) + return -errno; + saved = ptr; while (*ptr == '/') ptr++; @@ -327,8 +398,7 @@ static int ct_mkdir_p(const char *path) *ptr = '/'; if (rc < 0 && errno != EEXIST) { rc = -errno; - CT_ERROR("'%s' mkdir failed (%s)\n", path, - strerror(-rc)); + CT_ERROR(rc, "cannot mkdir '%s'", path); free(saved); return rc; } @@ -350,14 +420,13 @@ static int ct_save_stripe(int src_fd, const char *src, const char *dst) ssize_t xattr_size; snprintf(lov_file, sizeof(lov_file), "%s.lov", dst); - CT_TRACE("saving stripe info of '%s' in %s\n", src, lov_file); + CT_TRACE("saving stripe info of '%s' in %s", src, lov_file); xattr_size = fgetxattr(src_fd, XATTR_LUSTRE_LOV, lov_buf, sizeof(lov_buf)); if (xattr_size < 0) { rc = -errno; - CT_ERROR("'%s' cannot get stripe info on (%s)\n", src, - strerror(-rc)); + CT_ERROR(rc, "cannot get stripe info on '%s'", src); return rc; } @@ -373,15 +442,15 @@ static int ct_save_stripe(int src_fd, const char *src, const char *dst) fd = open(lov_file, O_TRUNC | O_CREAT | O_WRONLY, FILE_PERM); if (fd < 0) { rc = -errno; - CT_ERROR("'%s' cannot open (%s)\n", lov_file, strerror(-rc)); + CT_ERROR(rc, "cannot open '%s'", lov_file); goto err_cleanup; } rc = write(fd, lum, xattr_size); if (rc < 0) { rc = -errno; - CT_ERROR("'%s' cannot write %zd bytes (%s)\n", - lov_file, xattr_size, strerror(-rc)); + CT_ERROR(rc, "cannot write %zd bytes to '%s'", + xattr_size, lov_file); close(fd); goto err_cleanup; } @@ -389,7 +458,7 @@ static int ct_save_stripe(int src_fd, const char *src, const char *dst) rc = close(fd); if (rc < 0) { rc = -errno; - CT_ERROR("'%s' cannot close (%s)\n", lov_file, strerror(-rc)); + CT_ERROR(rc, "cannot close '%s'", lov_file); goto err_cleanup; } @@ -408,18 +477,18 @@ static int ct_load_stripe(const char *src, void *lovea, size_t *lovea_size) int fd; snprintf(lov_file, sizeof(lov_file), "%s.lov", src); - CT_TRACE("reading stripe rules from '%s' for '%s'\n", lov_file, src); + CT_TRACE("reading stripe rules from '%s' for '%s'", lov_file, src); fd = open(lov_file, O_RDONLY); if (fd < 0) { - CT_ERROR("'%s' cannot open (%s)\n", lov_file, strerror(errno)); + CT_ERROR(errno, "cannot open '%s'", lov_file); return -ENODATA; } rc = read(fd, lovea, *lovea_size); if (rc < 0) { - CT_ERROR("'%s' cannot read %zu bytes (%s)\n", lov_file, - *lovea_size, strerror(errno)); + CT_ERROR(errno, "cannot read %zu bytes from '%s'", + *lovea_size, lov_file); close(fd); return -ENODATA; } @@ -438,226 +507,180 @@ static int ct_restore_stripe(const char *src, const char *dst, int dst_fd, rc = fsetxattr(dst_fd, XATTR_LUSTRE_LOV, lovea, lovea_size, XATTR_CREATE); if (rc < 0) { - CT_ERROR("'%s' cannot set striping (%s)\n", - dst, strerror(errno)); rc = -errno; + CT_ERROR(rc, "cannot set lov EA on '%s'", dst); } return rc; } -/* non-blocking read or write */ -static int nonblock_rw(bool wr, int fd, char *buf, int size) -{ - int rc; - - if (wr) - rc = write(fd, buf, size); - else - rc = read(fd, buf, size); - - if ((rc < 0) && (errno == -EAGAIN)) { - fd_set set; - struct timeval timeout; - - timeout.tv_sec = opt.o_report_int; - - FD_ZERO(&set); - FD_SET(fd, &set); - if (wr) - rc = select(FD_SETSIZE, NULL, &set, NULL, &timeout); - else - rc = select(FD_SETSIZE, &set, NULL, NULL, &timeout); - if (rc < 0) - return -errno; - if (rc == 0) - /* Timed out, we read nothing */ - return -EAGAIN; - - /* Should be available now */ - if (wr) - rc = write(fd, buf, size); - else - rc = read(fd, buf, size); - } - - if (rc < 0) - rc = -errno; - - return rc; -} - static int ct_copy_data(struct hsm_copyaction_private *hcp, const char *src, const char *dst, int src_fd, int dst_fd, const struct hsm_action_item *hai, long hal_flags) { struct hsm_extent he; + __u64 offset = hai->hai_extent.offset; struct stat src_st; struct stat dst_st; - char *buf; - __u64 wpos = 0; - __u64 rpos = 0; - __u64 rlen; - time_t last_print_time = time(0); - int rsize; - int wsize; - int bufoff = 0; + char *buf = NULL; + __u64 write_total = 0; + __u64 length = hai->hai_extent.length; + time_t last_report_time; int rc = 0; - - CT_TRACE("going to copy data from '%s' to %s\n", src, dst); - - buf = malloc(opt.o_chunk_size); - if (buf == NULL) - return -ENOMEM; + double start_ct_now = ct_now(); + /* Bandwidth Control */ + time_t start_time; + time_t now; + time_t last_bw_print; if (fstat(src_fd, &src_st) < 0) { - CT_ERROR("'%s' stat failed (%s)\n", src, strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot stat '%s'", src); + return rc; } if (!S_ISREG(src_st.st_mode)) { - CT_ERROR("'%s' not a regular file\n", src); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "'%s' is not a regular file", src); + return rc; } - rc = lseek(src_fd, hai->hai_extent.offset, SEEK_SET); - if (rc < 0) { - CT_ERROR("'%s' seek to read to "LPU64" (len %zu)" - " failed (%s)\n", - src, hai->hai_extent.offset, src_st.st_size, - strerror(errno)); - rc = -errno; - goto out; + if (hai->hai_extent.offset > (__u64)src_st.st_size) { + rc = -EINVAL; + CT_ERROR(rc, "Trying to start reading past end (%ju > " + "%jd) of '%s' source file", + (uintmax_t)hai->hai_extent.offset, + (intmax_t)src_st.st_size, src); + return rc; } if (fstat(dst_fd, &dst_st) < 0) { - CT_ERROR("'%s' stat failed (%s)\n", dst, strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot stat '%s'", dst); + return rc; } if (!S_ISREG(dst_st.st_mode)) { - CT_ERROR("'%s' not a regular file\n", dst); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "'%s' is not a regular file", dst); + return rc; } - rc = lseek(dst_fd, hai->hai_extent.offset, SEEK_SET); - if (rc < 0) { - CT_ERROR("'%s' seek to write to "LPU64" failed (%s)\n", src, - hai->hai_extent.offset, strerror(errno)); - rc = -errno; - goto out; - } + /* Don't read beyond a given extent */ + if (length > src_st.st_size - hai->hai_extent.offset) + length = src_st.st_size - hai->hai_extent.offset; - he.offset = hai->hai_extent.offset; + start_time = last_bw_print = last_report_time = time(NULL); + + he.offset = offset; he.length = 0; - rc = llapi_hsm_action_progress(hcp, &he, 0); + rc = llapi_hsm_action_progress(hcp, &he, length, 0); if (rc < 0) { /* Action has been canceled or something wrong * is happening. Stop copying data. */ - CT_ERROR("%s->'%s' progress returned err %d\n", src, dst, rc); + CT_ERROR(rc, "progress ioctl for copy '%s'->'%s' failed", + src, dst); goto out; } errno = 0; - /* Don't read beyond a given extent */ - rlen = (hai->hai_extent.length == -1LL) ? - src_st.st_size : hai->hai_extent.length; - - while (wpos < rlen) { - int chunk = (rlen - wpos > opt.o_chunk_size) ? - opt.o_chunk_size : rlen - wpos; - - /* Only read more if we wrote everything in the buffer */ - if (wpos == rpos) { - rsize = nonblock_rw(0, src_fd, buf, chunk); - if (rsize == 0) - /* EOF */ - break; - if (rsize == -EAGAIN) { - /* Timed out */ - rsize = 0; - if (rpos == 0) { - /* Haven't read anything yet, let's - * give it back to the coordinator - * for rescheduling */ - rc = -EAGAIN; - break; - } - } + buf = malloc(opt.o_chunk_size); + if (buf == NULL) { + rc = -ENOMEM; + goto out; + } - if (rsize < 0) { - CT_ERROR("'%s' read failed (%s)\n", src, - strerror(-rsize)); - rc = rsize; - break; - } + CT_TRACE("start copy of %ju bytes from '%s' to '%s'", + (uintmax_t)length, src, dst); - rpos += rsize; - bufoff = 0; - } + while (write_total < length) { + ssize_t rsize; + ssize_t wsize; + int chunk = (length - write_total > opt.o_chunk_size) ? + opt.o_chunk_size : length - write_total; - wsize = nonblock_rw(1, dst_fd, buf + bufoff, rpos - wpos); - if (wsize == -EAGAIN) - /* Timed out */ - wsize = 0; + rsize = pread(src_fd, buf, chunk, offset); + if (rsize == 0) + /* EOF */ + break; + if (rsize < 0) { + rc = -errno; + CT_ERROR(rc, "cannot read from '%s'", src); + break; + } + + wsize = pwrite(dst_fd, buf, rsize, offset); if (wsize < 0) { - CT_ERROR("'%s' write failed (%s)\n", dst, - strerror(-wsize)); - rc = wsize; + rc = -errno; + CT_ERROR(rc, "cannot write to '%s'", dst); break; } - wpos += wsize; - bufoff += wsize; + write_total += wsize; + offset += wsize; + now = time(NULL); + /* sleep if needed, to honor bandwidth limits */ if (opt.o_bandwidth != 0) { - static unsigned long long tot_bytes; - static time_t start_time, last_time; - time_t now = time(0); - double tot_time, excess; - unsigned int sleep_time; - - if (now > last_time + 5) { - tot_bytes = 0; - start_time = last_time = now; - } + unsigned long long write_theory; - tot_bytes += wsize; - tot_time = now - start_time; - if (tot_time < 1) - tot_time = 1; + write_theory = (now - start_time) * opt.o_bandwidth; - excess = tot_bytes - tot_time * opt.o_bandwidth; - sleep_time = excess * 1000000 / opt.o_bandwidth; - if ((now - start_time) % 10 == 1) - CT_TRACE("bandwith control: excess=%E" - " sleep for %dus\n", - excess, sleep_time); + if (write_theory < write_total) { + unsigned long long excess; + struct timespec delay; - if (excess > 0) - usleep(sleep_time); + excess = write_total - write_theory; - last_time = now; + delay.tv_sec = excess / opt.o_bandwidth; + delay.tv_nsec = (excess % opt.o_bandwidth) * + NSEC_PER_SEC / opt.o_bandwidth; + + if (now >= last_bw_print + opt.o_report_int) { + CT_TRACE("bandwith control: %lluB/s " + "excess=%llu sleep for " + "%lld.%09lds", + opt.o_bandwidth, excess, + (long long)delay.tv_sec, + delay.tv_nsec); + last_bw_print = now; + } + + do { + rc = nanosleep(&delay, &delay); + } while (rc < 0 && errno == EINTR); + if (rc < 0) { + CT_ERROR(errno, "delay for bandwidth " + "control failed to sleep: " + "residual=%lld.%09lds", + (long long)delay.tv_sec, + delay.tv_nsec); + rc = 0; + } + } } - if (time(0) >= last_print_time + opt.o_report_int) { - last_print_time = time(0); - CT_TRACE("%%"LPU64" ", 100 * wpos / rlen); - he.length = wpos; - rc = llapi_hsm_action_progress(hcp, &he, 0); + now = time(NULL); + if (now >= last_report_time + opt.o_report_int) { + last_report_time = now; + CT_TRACE("%%%ju ", (uintmax_t)(100 * write_total / length)); + /* only give the length of the write since the last + * progress report */ + he.length = offset - he.offset; + rc = llapi_hsm_action_progress(hcp, &he, length, 0); if (rc < 0) { /* Action has been canceled or something wrong * is happening. Stop copying data. */ - CT_ERROR("%s->'%s' progress returned err %d\n", - src, dst, rc); + CT_ERROR(rc, "progress ioctl for copy" + " '%s'->'%s' failed", src, dst); goto out; } + he.offset = offset; } rc = 0; } - CT_TRACE("\n"); out: /* @@ -674,23 +697,17 @@ out: rc = ftruncate(dst_fd, src_st.st_size); if (rc < 0) { rc = -errno; - CT_ERROR("'%s' final truncate to %lu failed (%s)\n", - dst, src_st.st_size, strerror(-rc)); + CT_ERROR(rc, "cannot truncate '%s' to size %jd", + dst, (intmax_t)src_st.st_size); err_major++; } } - if (rc == 0) { - rc = fsync(dst_fd); - if (rc < 0) { - rc = -errno; - CT_ERROR("'%s' fsync failed (%s)\n", dst, - strerror(-rc)); - err_major++; - } - } + if (buf != NULL) + free(buf); - free(buf); + CT_TRACE("copied %ju bytes in %f seconds", + (uintmax_t)length, ct_now() - start_ct_now); return rc; } @@ -701,11 +718,12 @@ static int ct_copy_attr(const char *src, const char *dst, int src_fd, { struct stat st; struct timeval times[2]; + int rc; if (fstat(src_fd, &st) < 0) { - CT_ERROR("'%s' stat failed (%s)\n", - src, strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot stat '%s'", src); + return rc; } times[0].tv_sec = st.st_atime; @@ -715,9 +733,9 @@ static int ct_copy_attr(const char *src, const char *dst, int src_fd, if (fchmod(dst_fd, st.st_mode) < 0 || fchown(dst_fd, st.st_uid, st.st_gid) < 0 || futimes(dst_fd, times) < 0) { - CT_ERROR("'%s' fchmod fchown or futimes failed (%s)\n", src, - strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot set attributes of '%s'", src); + return rc; } return 0; @@ -747,13 +765,15 @@ static int ct_copy_xattr(const char *src, const char *dst, int src_fd, (strncmp(XATTR_TRUSTED_PREFIX, name, sizeof(XATTR_TRUSTED_PREFIX) - 1) != 0)) { rc = fsetxattr(dst_fd, name, value, rc, 0); - CT_TRACE("'%s' fsetxattr of '%s' rc=%d (%s)\n", - dst, name, rc, strerror(errno)); + CT_TRACE("fsetxattr of '%s' on '%s' rc=%d (%s)", + name, dst, rc, strerror(errno)); /* lustre.* attrs aren't supported on other FS's */ if (rc < 0 && errno != EOPNOTSUPP) { - CT_ERROR("'%s' fsetxattr of '%s' failed (%s)\n", - dst, name, strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot set extended attribute" + " '%s' of '%s'", + name, dst); + return rc; } } name += strlen(name) + 1; @@ -799,7 +819,7 @@ static int ct_begin_restore(struct hsm_copyaction_private **phcp, false); if (rc < 0) { ct_path_lustre(src, sizeof(src), opt.o_mnt, &hai->hai_fid); - CT_ERROR("'%s' copy start failed (%s)\n", src, strerror(-rc)); + CT_ERROR(rc, "llapi_hsm_action_begin() on '%s' failed", src); } return rc; @@ -814,26 +834,39 @@ static int ct_begin(struct hsm_copyaction_private **phcp, } static int ct_fini(struct hsm_copyaction_private **phcp, - const struct hsm_action_item *hai, int flags, int ct_rc) + const struct hsm_action_item *hai, int hp_flags, int ct_rc) { - char lstr[PATH_MAX]; - int rc; + struct hsm_copyaction_private *hcp; + char lstr[PATH_MAX]; + int rc; CT_TRACE("Action completed, notifying coordinator " - "cookie="LPX64", FID="DFID", flags=%d err=%d\n", - hai->hai_cookie, PFID(&hai->hai_fid), - flags, -ct_rc); + "cookie=%#jx, FID="DFID", hp_flags=%d err=%d", + (uintmax_t)hai->hai_cookie, PFID(&hai->hai_fid), + hp_flags, -ct_rc); ct_path_lustre(lstr, sizeof(lstr), opt.o_mnt, &hai->hai_fid); - rc = llapi_hsm_action_end(phcp, &hai->hai_extent, flags, abs(ct_rc)); + + if (phcp == NULL || *phcp == NULL) { + rc = llapi_hsm_action_begin(&hcp, ctdata, hai, -1, 0, true); + if (rc < 0) { + CT_ERROR(rc, "llapi_hsm_action_begin() on '%s' failed", + lstr); + return rc; + } + phcp = &hcp; + } + + rc = llapi_hsm_action_end(phcp, &hai->hai_extent, hp_flags, abs(ct_rc)); if (rc == -ECANCELED) - CT_ERROR("'%s' completed action has been canceled: " - "cookie="LPX64", FID="DFID"\n", lstr, hai->hai_cookie, - PFID(&hai->hai_fid)); + CT_ERROR(rc, "completed action on '%s' has been canceled: " + "cookie=%#jx, FID="DFID, lstr, + (uintmax_t)hai->hai_cookie, PFID(&hai->hai_fid)); else if (rc < 0) - CT_ERROR("'%s' copy end failed (%s)\n", lstr, strerror(-rc)); + CT_ERROR(rc, "llapi_hsm_action_end() on '%s' failed", lstr); else - CT_TRACE("'%s' copy end ok (rc=%d)\n", lstr, rc); + CT_TRACE("llapi_hsm_action_end() on '%s' ok (rc=%d)", + lstr, rc); return rc; } @@ -842,11 +875,11 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) { struct hsm_copyaction_private *hcp = NULL; char src[PATH_MAX]; - char dst[PATH_MAX]; + char dst[PATH_MAX] = ""; int rc; int rcf = 0; bool rename_needed = false; - int ct_flags = 0; + int hp_flags = 0; int open_flags; int src_fd = -1; int dst_fd = -1; @@ -864,13 +897,13 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) if (hai->hai_extent.length == -1) { /* whole file, write it to tmp location and atomically * replace old archived file */ - strncat(dst, "_tmp", sizeof(dst) - strlen(dst) - 1); + strlcat(dst, "_tmp", sizeof(dst)); /* we cannot rely on the same test because ct_copy_data() * updates hai_extent.length */ rename_needed = true; } - CT_TRACE("'%s' archived to %s\n", src, dst); + CT_TRACE("archiving '%s' to '%s'", src, dst); if (opt.o_dry_run) { rc = 0; @@ -879,53 +912,59 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) rc = ct_mkdir_p(dst); if (rc < 0) { - CT_ERROR("'%s' mkdir_p failed (%s)\n", dst, strerror(-rc)); + CT_ERROR(rc, "mkdir_p '%s' failed", dst); goto fini_major; } - src_fd = open(src, O_RDONLY | O_NOATIME | O_NONBLOCK | O_NOFOLLOW); - if (src_fd == -1) { - CT_ERROR("'%s' open read failed (%s)\n", src, strerror(errno)); - rc = -errno; + src_fd = llapi_hsm_action_get_fd(hcp); + if (src_fd < 0) { + rc = src_fd; + CT_ERROR(rc, "cannot open '%s' for read", src); goto fini_major; } - open_flags = O_WRONLY | O_NOFOLLOW | O_NONBLOCK; + open_flags = O_WRONLY | O_NOFOLLOW; /* If extent is specified, don't truncate an old archived copy */ open_flags |= ((hai->hai_extent.length == -1) ? O_TRUNC : 0) | O_CREAT; dst_fd = open(dst, open_flags, FILE_PERM); if (dst_fd == -1) { - CT_ERROR("'%s' open write failed (%s)\n", dst, strerror(errno)); rc = -errno; + CT_ERROR(rc, "cannot open '%s' for write", dst); goto fini_major; } /* saving stripe is not critical */ rc = ct_save_stripe(src_fd, src, dst); if (rc < 0) - CT_ERROR("'%s' cannot save file striping info in '%s' (%s)\n", - src, dst, strerror(-rc)); + CT_ERROR(rc, "cannot save file striping info of '%s' in '%s'", + src, dst); rc = ct_copy_data(hcp, src, dst, src_fd, dst_fd, hai, hal_flags); if (rc < 0) { - CT_ERROR("'%s' data copy failed to '%s' (%s)\n", - src, dst, strerror(-rc)); + CT_ERROR(rc, "data copy failed from '%s' to '%s'", src, dst); goto fini_major; } - CT_TRACE("'%s' data archived to '%s' done\n", src, dst); + rc = fsync(dst_fd); + if (rc < 0) { + rc = -errno; + CT_ERROR(rc, "cannot flush '%s' archive file '%s'", src, dst); + goto fini_major; + } + + CT_TRACE("data archiving for '%s' to '%s' done", src, dst); /* attrs will remain on the MDS; no need to copy them, except possibly for disaster recovery */ if (opt.o_copy_attrs) { rc = ct_copy_attr(src, dst, src_fd, dst_fd); if (rc < 0) { - CT_ERROR("'%s' attr copy failed to '%s' (%s)\n", - src, dst, strerror(-rc)); + CT_ERROR(rc, "cannot copy attr of '%s' to '%s'", + src, dst); rcf = rc; } - CT_TRACE("'%s' attr file copied to archive '%s'\n", + CT_TRACE("attr file for '%s' saved to archive '%s'", src, dst); } @@ -934,11 +973,11 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) if (opt.o_copy_xattrs) { rc = ct_copy_xattr(src, dst, src_fd, dst_fd, false); if (rc < 0) { - CT_ERROR("'%s' xattr copy failed to '%s' (%s)\n", - src, dst, strerror(-rc)); + CT_ERROR(rc, "cannot copy xattr of '%s' to '%s'", + src, dst); rcf = rcf ? rcf : rc; } - CT_ERROR("'%s' xattr file copied to archive '%s'\n", + CT_TRACE("xattr file for '%s' saved to archive '%s'", src, dst); } @@ -951,9 +990,8 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) &hai->hai_fid); rc = rename(dst, src); if (rc < 0) { - CT_ERROR("'%s' renamed to '%s' failed (%s)\n", dst, src, - strerror(errno)); rc = -errno; + CT_ERROR(rc, "cannot rename '%s' to '%s'", dst, src); goto fini_major; } /* rename lov file */ @@ -961,8 +999,8 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) snprintf(tmp_dst, sizeof(tmp_dst), "%s.lov", dst); rc = rename(tmp_dst, tmp_src); if (rc < 0) - CT_ERROR("'%s' renamed to '%s' failed (%s)\n", - tmp_dst, tmp_src, strerror(errno)); + CT_ERROR(errno, "cannot rename '%s' to '%s'", + tmp_dst, tmp_src); } if (opt.o_shadow_tree) { @@ -977,7 +1015,7 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) int linkno = 0; char *ptr; int depth = 0; - int sz; + ssize_t sz; sprintf(buf, DFID, PFID(&hai->hai_fid)); sprintf(src, "%s/shadow/", opt.o_hsm_root); @@ -989,8 +1027,7 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) rc = llapi_fid2path(opt.o_mnt, buf, src + strlen(src), sizeof(src) - strlen(src), &recno, &linkno); if (rc < 0) { - CT_ERROR("'%s' fid2path failed (%s)\n", buf, - strerror(-rc)); + CT_ERROR(rc, "cannot get FID of '%s'", buf); rcf = rcf ? rcf : rc; goto fini_minor; } @@ -1006,41 +1043,46 @@ static int ct_archive(const struct hsm_action_item *hai, const long hal_flags) ct_path_archive(dst, sizeof(dst), buf, &hai->hai_fid); if (ct_mkdir_p(src)) { - CT_ERROR("'%s' mkdir_p failed (%s)\n", src, - strerror(errno)); + CT_ERROR(errno, "mkdir_p '%s' failed", src); rcf = rcf ? rcf : -errno; goto fini_minor; } /* symlink already exists ? */ sz = readlink(src, buf, sizeof(buf)); + /* detect truncation */ + if (sz == sizeof(buf)) { + rcf = rcf ? rcf : -E2BIG; + CT_ERROR(rcf, "readlink '%s' truncated", src); + goto fini_minor; + } if (sz >= 0) { buf[sz] = '\0'; if (sz == 0 || strncmp(buf, dst, sz) != 0) { if (unlink(src) && errno != ENOENT) { - CT_ERROR("'%s' unlink symlink failed " - "(%s)\n", src, - strerror(errno)); + CT_ERROR(errno, + "cannot unlink symlink '%s'", + src); rcf = rcf ? rcf : -errno; goto fini_minor; /* unlink old symlink done */ - CT_TRACE("'%s' remove old symlink pointing" - " to '%s'\n", src, buf); + CT_TRACE("remove old symlink '%s' pointing" + " to '%s'", src, buf); } } else { /* symlink already ok */ - CT_TRACE("'%s' symlink already pointing" - " to '%s'\n", src, dst); + CT_TRACE("symlink '%s' already pointing" + " to '%s'", src, dst); rcf = 0; goto fini_minor; } } if (symlink(dst, src)) { - CT_ERROR("'%s' symlink to '%s' failed (%s)\n", src, dst, - strerror(errno)); + CT_ERROR(errno, "cannot symlink '%s' to '%s'", + src, dst); rcf = rcf ? rcf : -errno; goto fini_minor; } - CT_TRACE("'%s' symlink to '%s' done\n", src, dst); + CT_TRACE("symlink '%s' to '%s' done", src, dst); } fini_minor: if (rcf) @@ -1053,7 +1095,7 @@ fini_major: unlink(dst); if (ct_is_retryable(rc)) - ct_flags |= HP_FLAG_RETRY; + hp_flags |= HP_FLAG_RETRY; rcf = rc; @@ -1064,8 +1106,7 @@ out: if (!(dst_fd < 0)) close(dst_fd); - if (hcp != NULL) - rc = ct_fini(&hcp, hai, ct_flags, rcf); + rc = ct_fini(&hcp, hai, hp_flags, rcf); return rc; } @@ -1078,14 +1119,13 @@ static int ct_restore(const struct hsm_action_item *hai, const long hal_flags) char lov_buf[XATTR_SIZE_MAX]; size_t lov_size = sizeof(lov_buf); int rc; - int flags = 0; + int hp_flags = 0; int src_fd = -1; int dst_fd = -1; - int mdt_index = -1; /* Not implemented */ + int mdt_index = -1; int open_flags = 0; bool set_lovea; - lustre_fid dfid; - + struct lu_fid dfid; /* we fill lustre so: * source = lustre FID in the backend * destination = data FID = volatile file @@ -1094,11 +1134,18 @@ static int ct_restore(const struct hsm_action_item *hai, const long hal_flags) /* build backend file name from released file FID */ ct_path_archive(src, sizeof(src), opt.o_hsm_root, &hai->hai_fid); + rc = llapi_get_mdt_index_by_fid(opt.o_mnt_fd, &hai->hai_fid, + &mdt_index); + if (rc < 0) { + CT_ERROR(rc, "cannot get mdt index "DFID"", + PFID(&hai->hai_fid)); + return rc; + } /* restore loads and sets the LOVEA w/o interpreting it to avoid * dependency on the structure format. */ rc = ct_load_stripe(src, lov_buf, &lov_size); if (rc < 0) { - CT_WARN("'%s' cannot get stripe rules (%s), use default\n", + CT_WARN("cannot get stripe rules for '%s' (%s), use default", src, strerror(-rc)); set_lovea = false; } else { @@ -1113,38 +1160,43 @@ static int ct_restore(const struct hsm_action_item *hai, const long hal_flags) /* get the FID of the volatile file */ rc = llapi_hsm_action_get_dfid(hcp, &dfid); if (rc < 0) { - CT_ERROR("restoring "DFID", cannot get FID of created volatile" - " file (%s)\n", PFID(&hai->hai_fid), strerror(-rc)); + CT_ERROR(rc, "restoring "DFID + ", cannot get FID of created volatile file", + PFID(&hai->hai_fid)); goto fini; } /* build volatile "file name", for messages */ snprintf(dst, sizeof(dst), "{VOLATILE}="DFID, PFID(&dfid)); - CT_TRACE("'%s' restore data to '%s'\n", src, dst); + CT_TRACE("restoring data from '%s' to '%s'", src, dst); if (opt.o_dry_run) { rc = 0; goto fini; } - src_fd = open(src, O_RDONLY | O_NOATIME | O_NONBLOCK | O_NOFOLLOW); + src_fd = open(src, O_RDONLY | O_NOATIME | O_NOFOLLOW); if (src_fd < 0) { - CT_ERROR("'%s' open for read failed (%s)\n", src, - strerror(errno)); rc = -errno; + CT_ERROR(rc, "cannot open '%s' for read", src); goto fini; } dst_fd = llapi_hsm_action_get_fd(hcp); + if (dst_fd < 0) { + rc = dst_fd; + CT_ERROR(rc, "cannot open '%s' for write", dst); + goto fini; + } if (set_lovea) { /* the layout cannot be allocated through .fid so we have to * restore a layout */ rc = ct_restore_stripe(src, dst, dst_fd, lov_buf, lov_size); if (rc < 0) { - CT_ERROR("'%s' cannot restore file striping info from " - "'%s' (%s)\n", dst, src, strerror(-rc)); + CT_ERROR(rc, "cannot restore file striping info" + " for '%s' from '%s'", dst, src); err_major++; goto fini; } @@ -1152,19 +1204,18 @@ static int ct_restore(const struct hsm_action_item *hai, const long hal_flags) rc = ct_copy_data(hcp, src, dst, src_fd, dst_fd, hai, hal_flags); if (rc < 0) { - CT_ERROR("'%s' data copy to '%s' failed (%s)\n", src, dst, - strerror(-rc)); + CT_ERROR(rc, "cannot copy data from '%s' to '%s'", + src, dst); err_major++; if (ct_is_retryable(rc)) - flags |= HP_FLAG_RETRY; + hp_flags |= HP_FLAG_RETRY; goto fini; } - CT_TRACE("'%s' data restore done to %s\n", src, dst); + CT_TRACE("data restore from '%s' to '%s' done", src, dst); fini: - if (hcp != NULL) - rc = ct_fini(&hcp, hai, flags, rc); + rc = ct_fini(&hcp, hai, hp_flags, rc); /* object swaping is done by cdt at copy end, so close of volatile file * cannot be done before */ @@ -1189,7 +1240,7 @@ static int ct_remove(const struct hsm_action_item *hai, const long hal_flags) ct_path_archive(dst, sizeof(dst), opt.o_hsm_root, &hai->hai_fid); - CT_TRACE("'%s' removed file\n", dst); + CT_TRACE("removing file '%s'", dst); if (opt.o_dry_run) { rc = 0; @@ -1199,38 +1250,22 @@ static int ct_remove(const struct hsm_action_item *hai, const long hal_flags) rc = unlink(dst); if (rc < 0) { rc = -errno; - CT_ERROR("'%s' unlink failed (%s)\n", dst, strerror(-rc)); + CT_ERROR(rc, "cannot unlink '%s'", dst); err_minor++; goto fini; } - strncat(dst, ".lov", sizeof(dst) - strlen(dst) - 1); + strlcat(dst, ".lov", sizeof(dst)); rc = unlink(dst); if (rc < 0) { rc = -errno; - CT_ERROR("'%s' unlink failed (%s)\n", dst, strerror(-rc)); + CT_ERROR(rc, "cannot unlink '%s'", dst); err_minor++; goto fini; } fini: - if (hcp != NULL) - rc = ct_fini(&hcp, hai, 0, rc); - - return rc; -} - -static int ct_report_error(const struct hsm_action_item *hai, int flags, - int errval) -{ - struct hsm_copyaction_private *hcp; - int rc; - - rc = llapi_hsm_action_begin(&hcp, ctdata, hai, -1, 0, true); - if (rc < 0) - return rc; - - rc = llapi_hsm_action_end(&hcp, &hai->hai_extent, flags, abs(errval)); + rc = ct_fini(&hcp, hai, 0, rc); return rc; } @@ -1247,16 +1282,15 @@ static int ct_process_item(struct hsm_action_item *hai, const long hal_flags) int linkno = 0; sprintf(fid, DFID, PFID(&hai->hai_fid)); - CT_TRACE("'%s' action %s reclen %d, cookie="LPX64"\n", + CT_TRACE("'%s' action %s reclen %d, cookie=%#jx", fid, hsm_copytool_action2name(hai->hai_action), - hai->hai_len, hai->hai_cookie); + hai->hai_len, (uintmax_t)hai->hai_cookie); rc = llapi_fid2path(opt.o_mnt, fid, path, sizeof(path), &recno, &linkno); if (rc < 0) - CT_ERROR("'%s' fid2path failed (%s)\n", fid, - strerror(-rc)); + CT_ERROR(rc, "cannot get path of FID %s", fid); else - CT_TRACE("'%s' processing file\n", path); + CT_TRACE("processing file '%s'", path); } switch (hai->hai_action) { @@ -1271,7 +1305,8 @@ static int ct_process_item(struct hsm_action_item *hai, const long hal_flags) rc = ct_remove(hai, hal_flags); break; case HSMA_CANCEL: - CT_TRACE("'%s' cancel not implemented\n", opt.o_mnt); + CT_TRACE("cancel not implemented for file system '%s'", + opt.o_mnt); /* Don't report progress to coordinator for this cookie: * the copy function will get ECANCELED when reporting * progress. */ @@ -1279,10 +1314,11 @@ static int ct_process_item(struct hsm_action_item *hai, const long hal_flags) return 0; break; default: - CT_ERROR("'%s' unknown action %d\n", opt.o_mnt, - hai->hai_action); + rc = -EINVAL; + CT_ERROR(rc, "unknown action %d, on '%s'", hai->hai_action, + opt.o_mnt); err_minor++; - ct_report_error(hai, 0, -EINVAL); + ct_fini(NULL, hai, 0, rc); } return 0; @@ -1328,8 +1364,8 @@ static int ct_process_item_async(const struct hsm_action_item *hai, rc = pthread_attr_init(&attr); if (rc != 0) { - CT_ERROR("'%s' pthread_attr_init: %s\n", opt.o_mnt, - strerror(rc)); + CT_ERROR(rc, "pthread_attr_init failed for '%s' service", + opt.o_mnt); free(data->hai); free(data); return -rc; @@ -1339,7 +1375,8 @@ static int ct_process_item_async(const struct hsm_action_item *hai, rc = pthread_create(&thread, &attr, ct_thread, data); if (rc != 0) - CT_ERROR("'%s' thread create: (%s)\n", opt.o_mnt, strerror(rc)); + CT_ERROR(rc, "cannot create thread for '%s' service", + opt.o_mnt); pthread_attr_destroy(&attr); return 0; @@ -1352,11 +1389,12 @@ static int ct_import_one(const char *src, const char *dst) struct stat st; int rc; - CT_TRACE("'%s' importing from %s\n", dst, src); + CT_TRACE("importing '%s' from '%s'", dst, src); if (stat(src, &st) < 0) { - CT_ERROR("'%s' stat failed (%s)\n", src, strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot stat '%s'", src); + return rc; } if (opt.o_dry_run) @@ -1366,16 +1404,15 @@ static int ct_import_one(const char *src, const char *dst) opt.o_archive_cnt ? opt.o_archive_id[0] : 0, &st, 0, 0, 0, 0, NULL, &fid); if (rc < 0) { - CT_ERROR("'%s' import from '%s' failed (%s)\n", dst, src, - strerror(-rc)); - return -rc; + CT_ERROR(rc, "cannot import '%s' from '%s'", dst, src); + return rc; } ct_path_archive(newarc, sizeof(newarc), opt.o_hsm_root, &fid); rc = ct_mkdir_p(newarc); if (rc < 0) { - CT_ERROR("'%s' mkdir_p failed (%s)\n", newarc, strerror(-rc)); + CT_ERROR(rc, "mkdir_p '%s' failed", newarc); err_major++; return rc; @@ -1384,12 +1421,12 @@ static int ct_import_one(const char *src, const char *dst) /* Lots of choices now: mv, ln, ln -s ? */ rc = link(src, newarc); /* hardlink */ if (rc < 0) { - CT_ERROR("'%s' link to '%s' failed (%s)\n", newarc, src, - strerror(errno)); + rc = -errno; + CT_ERROR(rc, "cannot link '%s' to '%s'", newarc, src); err_major++; - return -errno; + return rc; } - CT_TRACE("'%s' imported from '%s'=='%s'\n", dst, newarc, src); + CT_TRACE("imported '%s' from '%s'=='%s'", dst, newarc, src); return 0; } @@ -1397,29 +1434,53 @@ static int ct_import_one(const char *src, const char *dst) static char *path_concat(const char *dirname, const char *basename) { char *result; - int dirlen = strlen(dirname); + int rc; - result = malloc(dirlen + strlen(basename) + 2); - if (result == NULL) + rc = asprintf(&result, "%s/%s", dirname, basename); + if (rc < 0) return NULL; - memcpy(result, dirname, dirlen); - result[dirlen] = '/'; - strcpy(result + dirlen + 1, basename); - return result; } +static int ct_import_fid(const lustre_fid *import_fid) +{ + char fid_path[PATH_MAX]; + int rc; + + ct_path_lustre(fid_path, sizeof(fid_path), opt.o_mnt, import_fid); + rc = access(fid_path, F_OK); + if (rc == 0 || errno != ENOENT) { + rc = (errno == 0) ? -EEXIST : -errno; + CT_ERROR(rc, "cannot import '"DFID"'", PFID(import_fid)); + return rc; + } + + ct_path_archive(fid_path, sizeof(fid_path), opt.o_hsm_root, + import_fid); + + CT_TRACE("Resolving "DFID" to %s", PFID(import_fid), fid_path); + + return ct_import_one(fid_path, opt.o_dst); +} + static int ct_import_recurse(const char *relpath) { DIR *dir; struct dirent ent, *cookie = NULL; char *srcpath, *newpath; + lustre_fid import_fid; int rc; if (relpath == NULL) return -EINVAL; + /* Is relpath a FID? In which case SFID should expand to three + * elements. */ + rc = sscanf(relpath, SFID, RFID(&import_fid)); + if (rc == 3) + return ct_import_fid(&import_fid); + srcpath = path_concat(opt.o_hsm_root, relpath); if (srcpath == NULL) { err_major++; @@ -1434,10 +1495,9 @@ static int ct_import_recurse(const char *relpath) final location. */ rc = ct_import_one(srcpath, opt.o_dst); } else { - CT_ERROR("'%s' opendir failed (%s)\n", srcpath, - strerror(errno)); - err_major++; rc = -errno; + CT_ERROR(rc, "cannot opendir '%s'", srcpath); + err_major++; } free(srcpath); return rc; @@ -1447,10 +1507,9 @@ static int ct_import_recurse(const char *relpath) while (1) { rc = readdir_r(dir, &ent, &cookie); if (rc != 0) { - CT_ERROR("'%s' readdir_r failed (%s)\n", relpath, - strerror(errno)); - err_major++; rc = -errno; + CT_ERROR(rc, "cannot readdir_r '%s'", relpath); + err_major++; goto out; } else if ((rc == 0) && (cookie == NULL)) { /* end of directory */ @@ -1483,14 +1542,13 @@ static int ct_import_recurse(const char *relpath) /* Import the file */ rc = ct_import_one(src, dst); } else { - CT_ERROR("'%s' ct_mkdir_p failed (%s)\n", dst, - strerror(-rc)); + CT_ERROR(rc, "ct_mkdir_p '%s' failed", dst); err_major++; } } if (rc != 0) { - CT_ERROR("'%s' importing failed\n", newpath); + CT_ERROR(rc, "cannot import '%s'", newpath); if (err_major && opt.o_abort_on_error) { free(newpath); goto out; @@ -1507,10 +1565,11 @@ out: static int ct_rebind_one(const lustre_fid *old_fid, const lustre_fid *new_fid) { - char src[PATH_MAX]; - char dst[PATH_MAX]; + char src[PATH_MAX]; + char dst[PATH_MAX]; + int rc; - CT_TRACE("rebind "DFID" to "DFID"\n", PFID(old_fid), PFID(new_fid)); + CT_TRACE("rebind "DFID" to "DFID, PFID(old_fid), PFID(new_fid)); ct_path_archive(src, sizeof(src), opt.o_hsm_root, old_fid); ct_path_archive(dst, sizeof(dst), opt.o_hsm_root, new_fid); @@ -1518,16 +1577,15 @@ static int ct_rebind_one(const lustre_fid *old_fid, const lustre_fid *new_fid) if (!opt.o_dry_run) { ct_mkdir_p(dst); if (rename(src, dst)) { - CT_ERROR("'%s' rename to '%s' failed (%s)\n", src, dst, - strerror(errno)); + rc = -errno; + CT_ERROR(rc, "cannot rename '%s' to '%s'", src, dst); return -errno; } /* rename lov file */ - strncat(src, ".lov", sizeof(src) - strlen(src) - 1); - strncat(dst, ".lov", sizeof(dst) - strlen(dst) - 1); + strlcat(src, ".lov", sizeof(src)); + strlcat(dst, ".lov", sizeof(dst)); if (rename(src, dst)) - CT_ERROR("'%s' rename to '%s' failed (%s)\n", src, dst, - strerror(errno)); + CT_ERROR(errno, "cannot rename '%s' to '%s'", src, dst); } return 0; @@ -1566,8 +1624,9 @@ static int ct_rebind_list(const char *list) filp = fopen(list, "r"); if (filp == NULL) { - CT_ERROR("'%s' open failed (%s)\n", list, strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot open '%s'", list); + return rc; } /* each line consists of 2 FID */ @@ -1584,7 +1643,8 @@ static int ct_rebind_list(const char *list) rc = sscanf(line, SFID" "SFID, RFID(&old_fid), RFID(&new_fid)); if (rc != 6 || !fid_is_file(&old_fid) || !fid_is_file(&new_fid)) { - CT_ERROR("'%s' FID expected near '%s', line %u\n", + CT_ERROR(EINVAL, + "'%s' FID expected near '%s', line %u", list, line, nl); err_major++; continue; @@ -1601,8 +1661,8 @@ static int ct_rebind_list(const char *list) if (line) free(line); - /* return 0 if all rebinds were sucessful */ - CT_TRACE("'%s' %u lines read, %u rebind successful\n", list, nl, ok); + /* return 0 if all rebinds were successful */ + CT_TRACE("%u lines read from '%s', %u rebind successful", nl, list, ok); return ok == nl ? 0 : -1; } @@ -1617,14 +1677,16 @@ static int ct_rebind(void) if (sscanf(opt.o_src, SFID, RFID(&old_fid)) != 3 || !fid_is_file(&old_fid)) { - CT_ERROR("'%s' invalid FID format\n", opt.o_src); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "'%s' invalid FID format", opt.o_src); + return rc; } if (sscanf(opt.o_dst, SFID, RFID(&new_fid)) != 3 || !fid_is_file(&new_fid)) { - CT_ERROR("'%s' invalid FID format\n", opt.o_dst); - return -EINVAL; + rc = -EINVAL; + CT_ERROR(rc, "'%s' invalid FID format", opt.o_dst); + return rc; } rc = ct_rebind_one(&old_fid, &new_fid); @@ -1650,8 +1712,7 @@ static int ct_dir_level_max(const char *dirpath, __u16 *sub_seqmax) dir = opendir(dirpath); if (dir == NULL) { rc = -errno; - CT_ERROR("'%s' failed to open directory (%s)\n", opt.o_hsm_root, - strerror(-rc)); + CT_ERROR(rc, "cannot open directory '%s'", opt.o_hsm_root); return rc; } @@ -1665,15 +1726,15 @@ static int ct_dir_level_max(const char *dirpath, __u16 *sub_seqmax) continue; if (sscanf(ent.d_name, "%hx", &sub_seq) != 1) { - CT_TRACE("'%s' unexpected dirname format, " - "skip entry.\n", ent.d_name); + CT_TRACE("'%s' has an unexpected dirname format, " + "skip entry", ent.d_name); continue; } if (sub_seq > *sub_seqmax) *sub_seqmax = sub_seq; } rc = -errno; - CT_ERROR("'%s' readdir_r failed (%s)\n", dirpath, strerror(-rc)); + CT_ERROR(rc, "cannot readdir_r '%s'", dirpath); out: closedir(dir); @@ -1682,24 +1743,31 @@ out: static int ct_max_sequence(void) { - int rc, i; - char path[PATH_MAX]; - __u64 seq = 0; - __u16 subseq; + int rc, i; + char path[PATH_MAX]; + __u64 seq = 0; + __u16 subseq; - strncpy(path, opt.o_hsm_root, sizeof(path)); + strlcpy(path, opt.o_hsm_root, sizeof(path)); /* FID sequence is stored in top-level directory names: * hsm_root/16bits (high weight)/16 bits/16 bits/16 bits (low weight). */ for (i = 0; i < 4; i++) { + size_t path_len; + rc = ct_dir_level_max(path, &subseq); if (rc != 0) return rc; seq |= ((__u64)subseq << ((3 - i) * 16)); - sprintf(path + strlen(path), "/%04x", subseq); + path_len = strlen(path); + rc = snprintf(path + path_len, sizeof(path) - path_len, + "/%04x", subseq); + if (rc >= (sizeof(path) - path_len)) + return -E2BIG; + path[sizeof(path) - 1] = '\0'; } - printf("max_sequence: %016Lx\n", seq); + printf("max_sequence: %#jx\n", (uintmax_t)seq); return 0; } @@ -1712,50 +1780,67 @@ static void handler(int signal) * does successfully unmount and the mount is actually gone, but the * mtab entry remains. So this just makes mtab happier. */ llapi_hsm_copytool_unregister(&ctdata); + + /* Also remove fifo upon signal as during normal/error exit */ + if (opt.o_event_fifo != NULL) + llapi_hsm_unregister_event_fifo(opt.o_event_fifo); _exit(1); } /* Daemon waits for messages from the kernel; run it in the background. */ static int ct_run(void) { - int rc; + struct sigaction cleanup_sigaction; + int rc; if (opt.o_daemonize) { rc = daemon(1, 1); if (rc < 0) { - CT_ERROR("%d: cannot start as daemon (%s)", getpid(), - strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot daemonize"); + return rc; } } - rc = llapi_hsm_copytool_register(&ctdata, opt.o_mnt, 0, - opt.o_archive_cnt, opt.o_archive_id); + setbuf(stdout, NULL); + + if (opt.o_event_fifo != NULL) { + rc = llapi_hsm_register_event_fifo(opt.o_event_fifo); + if (rc < 0) { + CT_ERROR(rc, "failed to register event fifo"); + return rc; + } + llapi_error_callback_set(llapi_hsm_log_error); + } + + rc = llapi_hsm_copytool_register(&ctdata, opt.o_mnt, + opt.o_archive_cnt, + opt.o_archive_id, 0); if (rc < 0) { - CT_ERROR("%d: cannot start copytool interface: %s\n", getpid(), - strerror(-rc)); + CT_ERROR(rc, "cannot start copytool interface"); return rc; } - signal(SIGINT, handler); - signal(SIGTERM, handler); + memset(&cleanup_sigaction, 0, sizeof(cleanup_sigaction)); + cleanup_sigaction.sa_handler = handler; + sigemptyset(&cleanup_sigaction.sa_mask); + sigaction(SIGINT, &cleanup_sigaction, NULL); + sigaction(SIGTERM, &cleanup_sigaction, NULL); while (1) { - struct hsm_action_list *hal; - struct hsm_action_item *hai; - int msgsize; - int i = 0; + struct hsm_action_list *hal; + struct hsm_action_item *hai; + int msgsize; + int i = 0; - CT_TRACE("%d: waiting for message from kernel\n", getpid()); + CT_TRACE("waiting for message from kernel"); rc = llapi_hsm_copytool_recv(ctdata, &hal, &msgsize); if (rc == -ESHUTDOWN) { - CT_TRACE("%d: shutting down", getpid()); + CT_TRACE("shutting down"); break; - } else if (rc == -EAGAIN) { - continue; /* msg not for us */ } else if (rc < 0) { - CT_WARN("%d: message receive: (%s)\n", getpid(), + CT_WARN("cannot receive action list: %s", strerror(-rc)); err_major++; if (opt.o_abort_on_error) @@ -1764,12 +1849,12 @@ static int ct_run(void) continue; } - CT_TRACE("%d: copytool fs=%s archive#=%d item_count=%d\n", - getpid(), hal->hal_fsname, hal->hal_archive_id, - hal->hal_count); + CT_TRACE("copytool fs=%s archive#=%d item_count=%d", + hal->hal_fsname, hal->hal_archive_id, hal->hal_count); if (strcmp(hal->hal_fsname, fs_name) != 0) { - CT_ERROR("'%s' invalid fs name, expecting: %s\n", + rc = -EINVAL; + CT_ERROR(rc, "'%s' invalid fs name, expecting: %s", hal->hal_fsname, fs_name); err_major++; if (opt.o_abort_on_error) @@ -1781,28 +1866,29 @@ static int ct_run(void) hai = hai_first(hal); while (++i <= hal->hal_count) { if ((char *)hai - (char *)hal > msgsize) { - CT_ERROR("'%s' item %d past end of message!\n", + rc = -EPROTO; + CT_ERROR(rc, + "'%s' item %d past end of message!", opt.o_mnt, i); err_major++; - rc = -EPROTO; break; } rc = ct_process_item_async(hai, hal->hal_flags); if (rc < 0) - CT_ERROR("'%s' item %d process err: %s\n", - opt.o_mnt, i, strerror(-rc)); + CT_ERROR(rc, "'%s' item %d process", + opt.o_mnt, i); if (opt.o_abort_on_error && err_major) break; hai = hai_next(hai); } - llapi_hsm_action_list_free(&hal); - if (opt.o_abort_on_error && err_major) break; } llapi_hsm_copytool_unregister(&ctdata); + if (opt.o_event_fifo != NULL) + llapi_hsm_unregister_event_fifo(opt.o_event_fifo); return rc; } @@ -1814,18 +1900,26 @@ static int ct_setup(void) /* set llapi message level */ llapi_msg_set_level(opt.o_verbose); - arc_fd = open(opt.o_hsm_root, O_DIRECTORY); + arc_fd = open(opt.o_hsm_root, O_RDONLY); if (arc_fd < 0) { - CT_ERROR("cannot open archive at '%s': %s\n", opt.o_hsm_root, - strerror(errno)); - return -errno; + rc = -errno; + CT_ERROR(rc, "cannot open archive at '%s'", opt.o_hsm_root); + return rc; } rc = llapi_search_fsname(opt.o_mnt, fs_name); if (rc < 0) { - CT_ERROR("cannot find a Lustre filesystem mounted at: %s\n", + CT_ERROR(rc, "cannot find a Lustre filesystem mounted at '%s'", opt.o_mnt); - return -rc; + return rc; + } + + opt.o_mnt_fd = open(opt.o_mnt, O_RDONLY); + if (opt.o_mnt_fd < 0) { + rc = -errno; + CT_ERROR(rc, "cannot open mount point at '%s'", + opt.o_mnt); + return rc; } return rc; @@ -1833,12 +1927,24 @@ static int ct_setup(void) static int ct_cleanup(void) { - if (arc_fd < 0) - return 0; + int rc; - if (close(arc_fd) < 0) { - CT_ERROR("cannot close archive: %s.\n", strerror(errno)); - return -errno; + if (opt.o_mnt_fd >= 0) { + rc = close(opt.o_mnt_fd); + if (rc < 0) { + rc = -errno; + CT_ERROR(rc, "cannot close mount point"); + return rc; + } + } + + if (arc_fd >= 0) { + rc = close(arc_fd); + if (rc < 0) { + rc = -errno; + CT_ERROR(rc, "cannot close archive root directory"); + return rc; + } } return 0; @@ -1848,14 +1954,16 @@ int main(int argc, char **argv) { int rc; - strncpy(cmd_name, basename(argv[0]), sizeof(cmd_name)); + strlcpy(cmd_name, basename(argv[0]), sizeof(cmd_name)); rc = ct_parseopts(argc, argv); if (rc < 0) { - CT_ERROR("try '%s --help' for more information.\n", cmd_name); + CT_WARN("try '%s --help' for more information", cmd_name); return -rc; } - ct_setup(); + rc = ct_setup(); + if (rc < 0) + goto error_cleanup; switch (opt.o_action) { case CA_IMPORT: @@ -1873,10 +1981,11 @@ int main(int argc, char **argv) } if (opt.o_action != CA_MAXSEQ) - CT_TRACE("%s(%d) finished, errs: %d major, %d minor, " - "rc=%d (%s)\n", argv[0], getpid(), err_major, - err_minor, rc, strerror(-rc)); + CT_TRACE("process finished, errs: %d major, %d minor," + " rc=%d (%s)", err_major, err_minor, rc, + strerror(-rc)); +error_cleanup: ct_cleanup(); return -rc;