X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fliblustreapi_hsm.c;h=8d9938bfb8d69309aef4b32fa09e76e49c062723;hp=751680dc784dd593db995acb66026c6b178f53f3;hb=e39d6451efb1d05ce7bb62eb0a91aebe7af302d9;hpb=42a2e7cff98f2d9b5569ad4f3b6671cfca0ba592 diff --git a/lustre/utils/liblustreapi_hsm.c b/lustre/utils/liblustreapi_hsm.c index 751680d..8d9938b 100644 --- a/lustre/utils/liblustreapi_hsm.c +++ b/lustre/utils/liblustreapi_hsm.c @@ -6,6 +6,8 @@ * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies * alternatives * + * Copyright (c) 2013, 2017, Intel Corporation. + * * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser General Public License * (LGPL) version 2.1 or (at your discretion) any later version. @@ -30,6 +32,7 @@ * Author: Henri Doreau */ +#include #include #include #include @@ -41,54 +44,636 @@ #include #include #include +#include #include +#include #include #include #include -#include +#include #ifdef HAVE_LINUX_UNISTD_H #include #else #include #endif -#include -#include -#include -#include +#include #include #include "lustreapi_internal.h" +#define OPEN_BY_FID_PATH dot_lustre_name"/fid" + /****** HSM Copytool API ********/ #define CT_PRIV_MAGIC 0xC0BE2001 struct hsm_copytool_private { - int magic; - char *mnt; - int mnt_fd; - lustre_kernelcomm kuc; - __u32 archives; + int magic; + char *mnt; + struct kuc_hdr *kuch; + int mnt_fd; + int open_by_fid_fd; + struct lustre_kernelcomm *kuc; }; #define CP_PRIV_MAGIC 0x19880429 struct hsm_copyaction_private { __u32 magic; + __u32 source_fd; __s32 data_fd; const struct hsm_copytool_private *ct_priv; struct hsm_copy copy; + lstatx_t statx; +}; + +enum ct_progress_type { + CT_START = 0, + CT_RUNNING = 50, + CT_FINISH = 100, + CT_CANCEL = 150, + CT_ERROR = 175 +}; + +enum ct_event { + CT_REGISTER = 1, + CT_UNREGISTER = 2, + CT_ARCHIVE_START = HSMA_ARCHIVE, + CT_ARCHIVE_RUNNING = HSMA_ARCHIVE + CT_RUNNING, + CT_ARCHIVE_FINISH = HSMA_ARCHIVE + CT_FINISH, + CT_ARCHIVE_CANCEL = HSMA_ARCHIVE + CT_CANCEL, + CT_ARCHIVE_ERROR = HSMA_ARCHIVE + CT_ERROR, + CT_RESTORE_START = HSMA_RESTORE, + CT_RESTORE_RUNNING = HSMA_RESTORE + CT_RUNNING, + CT_RESTORE_FINISH = HSMA_RESTORE + CT_FINISH, + CT_RESTORE_CANCEL = HSMA_RESTORE + CT_CANCEL, + CT_RESTORE_ERROR = HSMA_RESTORE + CT_ERROR, + CT_REMOVE_START = HSMA_REMOVE, + CT_REMOVE_RUNNING = HSMA_REMOVE + CT_RUNNING, + CT_REMOVE_FINISH = HSMA_REMOVE + CT_FINISH, + CT_REMOVE_CANCEL = HSMA_REMOVE + CT_CANCEL, + CT_REMOVE_ERROR = HSMA_REMOVE + CT_ERROR, + CT_EVENT_MAX }; -#include +/* initialized in llapi_hsm_register_event_fifo() */ +static int llapi_hsm_event_fd = -1; +static bool created_hsm_event_fifo; + +static inline const char *llapi_hsm_ct_ev2str(int type) +{ + switch (type) { + case CT_REGISTER: + return "REGISTER"; + case CT_UNREGISTER: + return "UNREGISTER"; + case CT_ARCHIVE_START: + return "ARCHIVE_START"; + case CT_ARCHIVE_RUNNING: + return "ARCHIVE_RUNNING"; + case CT_ARCHIVE_FINISH: + return "ARCHIVE_FINISH"; + case CT_ARCHIVE_CANCEL: + return "ARCHIVE_CANCEL"; + case CT_ARCHIVE_ERROR: + return "ARCHIVE_ERROR"; + case CT_RESTORE_START: + return "RESTORE_START"; + case CT_RESTORE_RUNNING: + return "RESTORE_RUNNING"; + case CT_RESTORE_FINISH: + return "RESTORE_FINISH"; + case CT_RESTORE_CANCEL: + return "RESTORE_CANCEL"; + case CT_RESTORE_ERROR: + return "RESTORE_ERROR"; + case CT_REMOVE_START: + return "REMOVE_START"; + case CT_REMOVE_RUNNING: + return "REMOVE_RUNNING"; + case CT_REMOVE_FINISH: + return "REMOVE_FINISH"; + case CT_REMOVE_CANCEL: + return "REMOVE_CANCEL"; + case CT_REMOVE_ERROR: + return "REMOVE_ERROR"; + default: + llapi_err_noerrno(LLAPI_MSG_ERROR, + "Unknown event type: %d", type); + return NULL; + } +} + +/** + * Writes a JSON event to the monitor FIFO. Noop if no FIFO has been + * registered. + * + * \param event A list of llapi_json_items comprising a + * single JSON-formatted event. + * + * \retval 0 on success. + * \retval -errno on error. + */ +static int llapi_hsm_write_json_event(struct llapi_json_item_list **event) +{ + int rc; + char time_string[40]; + char json_buf[PIPE_BUF]; + FILE *buf_file; + time_t event_time = time(0); + struct tm time_components; + struct llapi_json_item_list *json_items; + + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) + return 0; + + if (event == NULL || *event == NULL) + return -EINVAL; + + json_items = *event; + + localtime_r(&event_time, &time_components); + + if (strftime(time_string, sizeof(time_string), "%Y-%m-%d %T %z", + &time_components) == 0) { + rc = -EINVAL; + llapi_error(LLAPI_MSG_ERROR, rc, "strftime() failed"); + return rc; + } + + rc = llapi_json_add_item(&json_items, "event_time", LLAPI_JSON_STRING, + time_string); + if (rc < 0) { + llapi_error(LLAPI_MSG_ERROR, -rc, "error in " + "llapi_json_add_item()"); + return rc; + } + + buf_file = fmemopen(json_buf, sizeof(json_buf), "w"); + if (buf_file == NULL) + return -errno; + + rc = llapi_json_write_list(event, buf_file); + if (rc < 0) { + fclose(buf_file); + return rc; + } + + fclose(buf_file); + + if (write(llapi_hsm_event_fd, json_buf, strlen(json_buf)) < 0) { + /* Ignore write failures due to missing reader. */ + if (errno != EPIPE) + return -errno; + } + + return 0; +} + +/** + * Hook for llapi_hsm_copytool_register and llapi_hsm_copytool_unregister + * to generate JSON events suitable for consumption by a copytool + * monitoring process. + * + * \param priv Opaque private control structure. + * \param event_type The type of event (register or unregister). + * + * \retval 0 on success. + * \retval -errno on error. + */ +static int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv, + __u32 event_type) +{ + int rc; + char agent_uuid[UUID_MAX]; + struct hsm_copytool_private *ct; + struct llapi_json_item_list *json_items; + + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) + return 0; + + if (priv == NULL || *priv == NULL) + return -EINVAL; + + ct = *priv; + if (ct->magic != CT_PRIV_MAGIC) + return -EINVAL; + + if (event_type != CT_REGISTER && event_type != CT_UNREGISTER) + return -EINVAL; + + rc = llapi_json_init_list(&json_items); + if (rc < 0) + goto err; + + rc = llapi_get_agent_uuid(ct->mnt, agent_uuid, sizeof(agent_uuid)); + if (rc < 0) + goto err; + llapi_chomp_string(agent_uuid); + + rc = llapi_json_add_item(&json_items, "uuid", LLAPI_JSON_STRING, + agent_uuid); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "mount_point", LLAPI_JSON_STRING, + ct->mnt); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "archive", LLAPI_JSON_INTEGER, + &ct->kuc->lk_data_count); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING, + (char *)llapi_hsm_ct_ev2str(event_type)); + if (rc < 0) + goto err; + + rc = llapi_hsm_write_json_event(&json_items); + if (rc < 0) + goto err; + + goto out_free; + +err: + llapi_error(LLAPI_MSG_ERROR, rc, "error in " + "llapi_hsm_log_ct_registration()"); + +out_free: + if (json_items != NULL) + llapi_json_destroy_list(&json_items); + + return rc; +} + +/** + * Given a copytool progress update, construct a JSON event suitable for + * consumption by a copytool monitoring process. + * + * Examples of various events generated here and written by + * llapi_hsm_write_json_event: + * + * Copytool registration and deregistration: + * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "REGISTER", + * "archive": 0, "mount_point": "/mnt/lustre", + * "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"} + * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "UNREGISTER", + * "archive": 0, "mount_point": "/mnt/lustre", + * "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"} + * + * An archive action, start to completion: + * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "ARCHIVE_START", + * "total_bytes": 0, "lustre_path": "d71.sanity-hsm/f71.sanity-hsm", + * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} + * {"event_time": "2014-02-26 14:50:18 -0500", "event_type": "ARCHIVE_RUNNING", + * "current_bytes": 5242880, "total_bytes": 39000000, + * "lustre_path": "d71.sanity-hsm/f71.sanity-hsm", + * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} + * {"event_time": "2014-02-26 14:50:50 -0500", "event_type": "ARCHIVE_FINISH", + * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} + * + * A log message: + * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "LOGGED_MESSAGE", + * "level": "INFO", + * "message": "lhsmtool_posix[42]: copytool fs=lustre archive#=2 item_count=1"} + * + * \param hcp Opaque action handle returned by + * llapi_hsm_action_start. + * \param hai The hsm_action_item describing the request. + * \param progress_type The ct_progress_type describing the update. + * \param total The total expected bytes for the request. + * \param current The current copied byte count for the request. + * + * \retval 0 on success. + * \retval -errno on error. + */ +static int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp, + const struct hsm_action_item *hai, + __u32 progress_type, + __u64 total, __u64 current) +{ + int rc; + int linkno = 0; + long long recno = -1; + char lustre_path[PATH_MAX]; + char strfid[FID_NOBRACE_LEN + 1]; + struct hsm_copyaction_private *hcp; + struct llapi_json_item_list *json_items; + + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) + return 0; + + if (phcp == NULL || *phcp == NULL) + return -EINVAL; + + hcp = *phcp; + + rc = llapi_json_init_list(&json_items); + if (rc < 0) + goto err; + + snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(&hai->hai_dfid)); + rc = llapi_json_add_item(&json_items, "data_fid", + LLAPI_JSON_STRING, strfid); + if (rc < 0) + goto err; + + snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(&hai->hai_fid)); + rc = llapi_json_add_item(&json_items, "source_fid", + LLAPI_JSON_STRING, strfid); + if (rc < 0) + goto err; + + if (hcp->copy.hc_errval == ECANCELED) { + progress_type = CT_CANCEL; + goto cancel; + } + + if (hcp->copy.hc_errval != 0) { + progress_type = CT_ERROR; + + rc = llapi_json_add_item(&json_items, "errno", + LLAPI_JSON_INTEGER, + &hcp->copy.hc_errval); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "error", + LLAPI_JSON_STRING, + strerror(hcp->copy.hc_errval)); + if (rc < 0) + goto err; + + goto cancel; + } + + /* lustre_path isn't available after a restore completes */ + /* total_bytes isn't available after a restore or archive completes */ + if (progress_type != CT_FINISH) { + rc = llapi_fid2path(hcp->ct_priv->mnt, strfid, lustre_path, + sizeof(lustre_path), &recno, &linkno); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "lustre_path", + LLAPI_JSON_STRING, lustre_path); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "total_bytes", + LLAPI_JSON_BIGNUM, &total); + if (rc < 0) + goto err; + } + + if (progress_type == CT_RUNNING) { + rc = llapi_json_add_item(&json_items, "current_bytes", + LLAPI_JSON_BIGNUM, ¤t); + if (rc < 0) + goto err; + } + +cancel: + rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING, + (char *)llapi_hsm_ct_ev2str(hai->hai_action + + progress_type)); + if (rc < 0) + goto err; + + rc = llapi_hsm_write_json_event(&json_items); + if (rc < 0) + goto err; + + goto out_free; + +err: + llapi_error(LLAPI_MSG_ERROR, rc, "error in " + "llapi_hsm_log_ct_progress()"); + +out_free: + if (json_items != NULL) + llapi_json_destroy_list(&json_items); + + return rc; +} + +/** + * Given a path to a FIFO, create a filehandle for nonblocking writes to it. + * Intended to be used for copytool monitoring processes that read an + * event stream from the FIFO. Events written in the absence of a reader + * are lost. + * + * \param path Path to monitor FIFO. + * + * \retval 0 on success. + * \retval -errno on error. + */ +int llapi_hsm_register_event_fifo(const char *path) +{ + int read_fd; + struct stat statbuf; + struct sigaction ignore_action; + int rc; + + /* Create the FIFO if necessary. */ + if ((mkfifo(path, 0644) < 0) && (errno != EEXIST)) { + llapi_error(LLAPI_MSG_ERROR, errno, "mkfifo(%s) failed", path); + return -errno; + } + if (errno == EEXIST) { + if (stat(path, &statbuf) < 0) { + llapi_error(LLAPI_MSG_ERROR, errno, "mkfifo(%s) failed", + path); + return -errno; + } + if (!S_ISFIFO(statbuf.st_mode) || + ((statbuf.st_mode & 0777) != 0644)) { + llapi_error(LLAPI_MSG_ERROR, errno, "%s exists but is " + "not a pipe or has a wrong mode", path); + return -errno; + } + } else { + created_hsm_event_fifo = true; + } + + /* Open the FIFO for read so that the subsequent open for write + * doesn't immediately fail. */ + read_fd = open(path, O_RDONLY | O_NONBLOCK); + if (read_fd < 0) { + llapi_error(LLAPI_MSG_ERROR, errno, + "cannot open(%s) for read", path); + return -errno; + } + + /* Open the FIFO for writes, but don't block on waiting + * for a reader. */ + llapi_hsm_event_fd = open(path, O_WRONLY | O_NONBLOCK); + rc = -errno; + + /* Now close the reader. An external monitoring process can + * now open the FIFO for reads. If no reader comes along the + * events are lost. NOTE: Only one reader at a time! */ + close(read_fd); + + if (llapi_hsm_event_fd < 0) { + llapi_error(LLAPI_MSG_ERROR, -rc, + "cannot open(%s) for write", path); + return rc; + } + + /* Ignore SIGPIPEs -- can occur if the reader goes away. */ + memset(&ignore_action, 0, sizeof(ignore_action)); + ignore_action.sa_handler = SIG_IGN; + sigemptyset(&ignore_action.sa_mask); + sigaction(SIGPIPE, &ignore_action, NULL); + + return 0; +} + +/** + * Given a path to a FIFO, close its filehandle and delete the FIFO. + * + * \param path Path to monitor FIFO. + * + * \retval 0 on success. + * \retval -errno on error. + */ +int llapi_hsm_unregister_event_fifo(const char *path) +{ + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) + return 0; + + if (close(llapi_hsm_event_fd) < 0) + return -errno; + + if (created_hsm_event_fifo) { + unlink(path); + created_hsm_event_fifo = false; + } + + llapi_hsm_event_fd = -1; + + return 0; +} + +/** + * Custom logging callback to be used when a monitoring FIFO has been + * registered. Formats log entries as JSON events suitable for + * consumption by a copytool monitoring process. + * + * \param level The message loglevel. + * \param _rc The returncode associated with the message. + * \param fmt The message format string. + * \param args Arguments to be formatted by the format string. + * + * \retval None. + */ +void llapi_hsm_log_error(enum llapi_message_level level, int _rc, + const char *fmt, va_list args) +{ + int rc; + int msg_len; + int real_level; + char *msg = NULL; + va_list args2; + struct llapi_json_item_list *json_items; + + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) + return; + + rc = llapi_json_init_list(&json_items); + if (rc < 0) + goto err; + + if ((level & LLAPI_MSG_NO_ERRNO) == 0) { + rc = llapi_json_add_item(&json_items, "errno", + LLAPI_JSON_INTEGER, + &_rc); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "error", + LLAPI_JSON_STRING, + strerror(abs(_rc))); + if (rc < 0) + goto err; + } + + va_copy(args2, args); + msg_len = vsnprintf(NULL, 0, fmt, args2) + 1; + va_end(args2); + if (msg_len >= 0) { + msg = (char *) alloca(msg_len); + if (msg == NULL) { + rc = -ENOMEM; + goto err; + } + + rc = vsnprintf(msg, msg_len, fmt, args); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "message", + LLAPI_JSON_STRING, + msg); + if (rc < 0) + goto err; + } else { + rc = llapi_json_add_item(&json_items, "message", + LLAPI_JSON_STRING, + "INTERNAL ERROR: message failed"); + if (rc < 0) + goto err; + } + + real_level = level & LLAPI_MSG_NO_ERRNO; + real_level = real_level > 0 ? level - LLAPI_MSG_NO_ERRNO : level; + + rc = llapi_json_add_item(&json_items, "level", LLAPI_JSON_STRING, + (void *)llapi_msg_level2str(real_level)); + if (rc < 0) + goto err; + + rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING, + "LOGGED_MESSAGE"); + if (rc < 0) + goto err; + + rc = llapi_hsm_write_json_event(&json_items); + if (rc < 0) + goto err; + + goto out_free; + +err: + /* Write directly to stderr to avoid llapi_error, which now + * emits JSON event messages. */ + fprintf(stderr, "\nFATAL ERROR IN llapi_hsm_log_error(): rc %d,", rc); + +out_free: + if (json_items != NULL) + llapi_json_destroy_list(&json_items); +} /** Register a copytool - * \param[out] priv Opaque private control structure - * \param mnt Lustre filesystem mount point - * \param flags Open flags, currently unused (e.g. O_NONBLOCK) - * \param archive_count - * \param archives Which archive numbers this copytool is responsible for + * \param[out] priv Opaque private control structure + * \param mnt Lustre filesystem mount point + * \param archive_count Number of valid archive IDs in \a archives + * \param archives Which archive numbers this copytool is + * responsible for + * \param rfd_flags flags applied to read fd of pipe + * (e.g. O_NONBLOCK) + * + * \retval 0 on success. + * \retval -errno on error. */ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv, - const char *mnt, int flags, int archive_count, - int *archives) + const char *mnt, int archive_count, + int *archives, int rfd_flags) { struct hsm_copytool_private *ct; int rc; @@ -99,79 +684,112 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv, return -EINVAL; } + for (rc = 0; rc < archive_count; rc++) { + /* in the list we have an all archive wildcard + * so move to all archives mode + */ + if (archives[rc] == 0) { + archive_count = 0; + break; + } + } + ct = calloc(1, sizeof(*ct)); if (ct == NULL) return -ENOMEM; - ct->mnt_fd = open(mnt, O_DIRECTORY | O_RDONLY | O_NONBLOCK); + ct->magic = CT_PRIV_MAGIC; + ct->mnt_fd = -1; + ct->open_by_fid_fd = -1; + + ct->mnt = strdup(mnt); + if (ct->mnt == NULL) { + rc = -ENOMEM; + goto out_err; + } + + ct->kuch = calloc(1, HAL_MAXSIZE + sizeof(*ct->kuch)); + if (ct->kuch == NULL) { + rc = -ENOMEM; + goto out_err; + } + + ct->mnt_fd = open(ct->mnt, O_RDONLY); if (ct->mnt_fd < 0) { rc = -errno; goto out_err; } - ct->mnt = strdup(mnt); - if (ct->mnt == NULL) { + ct->open_by_fid_fd = openat(ct->mnt_fd, OPEN_BY_FID_PATH, O_RDONLY); + if (ct->open_by_fid_fd < 0) { + rc = -errno; + goto out_err; + } + + ct->kuc = malloc(sizeof(*ct) + archive_count * sizeof(__u32)); + if (ct->kuc == NULL) { rc = -ENOMEM; goto out_err; } - ct->magic = CT_PRIV_MAGIC; + ct->kuc->lk_rfd = LK_NOFD; + ct->kuc->lk_wfd = LK_NOFD; - /* no archives specified means "match all". */ - ct->archives = 0; + rc = libcfs_ukuc_start(ct->kuc, KUC_GRP_HSM, rfd_flags); + if (rc < 0) + goto out_free_kuc; + + ct->kuc->lk_flags = LK_FLG_DATANR; + ct->kuc->lk_data_count = archive_count; for (rc = 0; rc < archive_count; rc++) { - if (archives[rc] > 8 * sizeof(ct->archives)) { - llapi_err_noerrno(LLAPI_MSG_ERROR, - "Maximum of %d archives supported", - 8 * sizeof(ct->archives)); - goto out_err; + if (archives[rc] < 0) { + llapi_err_noerrno(LLAPI_MSG_ERROR, "%d requested when " + "archive id >= 0 is supported", + archives[rc]); + rc = -EINVAL; + goto out_kuc; } - /* in the list we have a all archive wildcard - * so move to all archives mode - */ - if (archives[rc] == 0) { - ct->archives = 0; - archive_count = 0; - break; - } - ct->archives |= (1 << (archives[rc] - 1)); - } - rc = libcfs_ukuc_start(&ct->kuc, KUC_GRP_HSM); - if (rc < 0) - goto out_err; + ct->kuc->lk_data[rc] = archives[rc]; + } - /* Storing archive(s) in lk_data; see mdc_ioc_hsm_ct_start */ - ct->kuc.lk_data = ct->archives; - rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc); + rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc); if (rc < 0) { rc = -errno; llapi_error(LLAPI_MSG_ERROR, rc, "cannot start copytool on '%s'", mnt); - goto out_err; - } else { - rc = 0; + goto out_kuc; } - /* Only the kernel reference keeps the write side open */ - close(ct->kuc.lk_wfd); - ct->kuc.lk_wfd = LK_NOFD; - if (rc < 0) - goto out_kuc; + llapi_hsm_log_ct_registration(&ct, CT_REGISTER); + /* Only the kernel reference keeps the write side open */ + close(ct->kuc->lk_wfd); + ct->kuc->lk_wfd = LK_NOFD; *priv = ct; + return 0; out_kuc: /* cleanup the kuc channel */ - libcfs_ukuc_stop(&ct->kuc); + libcfs_ukuc_stop(ct->kuc); + +out_free_kuc: + free(ct->kuc); out_err: if (!(ct->mnt_fd < 0)) close(ct->mnt_fd); - if (ct->mnt != NULL) - free(ct->mnt); + + if (!(ct->open_by_fid_fd < 0)) + close(ct->open_by_fid_fd); + + free(ct->mnt); + + free(ct->kuch); + free(ct); + return rc; } @@ -191,27 +809,51 @@ int llapi_hsm_copytool_unregister(struct hsm_copytool_private **priv) if (ct->magic != CT_PRIV_MAGIC) return -EINVAL; + /* Close the read side of the KUC pipe. This should be done + * before unregistering to avoid deadlock: a ldlm_cb thread + * enters libcfs_kkuc_group_put() acquires kg_sem and blocks + * in pipe_write() due to full pipe; then we attempt to + * unregister and block on kg_sem. */ + libcfs_ukuc_stop(ct->kuc); + /* Tell the kernel to stop sending us messages */ - ct->kuc.lk_flags = LK_FLG_STOP; - ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc); + ct->kuc->lk_flags = LK_FLG_STOP; + ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc); - /* Shut down the kernelcomms */ - libcfs_ukuc_stop(&ct->kuc); + llapi_hsm_log_ct_registration(&ct, CT_UNREGISTER); + close(ct->open_by_fid_fd); close(ct->mnt_fd); free(ct->mnt); + free(ct->kuch); + free(ct->kuc); free(ct); *priv = NULL; return 0; } +/** Returns a file descriptor to poll/select on. + * \param ct Opaque private control structure + * \retval -EINVAL on error + * \retval the file descriptor for reading HSM events from the kernel + */ +int llapi_hsm_copytool_get_fd(struct hsm_copytool_private *ct) +{ + if (ct == NULL || ct->magic != CT_PRIV_MAGIC) + return -EINVAL; + + return libcfs_ukuc_get_rfd(ct->kuc); +} + /** Wait for the next hsm_action_list * \param ct Opaque private control structure * \param halh Action list handle, will be allocated here * \param msgsize Number of bytes in the message, will be set here * \return 0 valid message received; halh and msgsize are set * <0 error code + * Note: The application must not call llapi_hsm_copytool_recv until it has + * cleared the data in ct->kuch from the previous call. */ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct, struct hsm_action_list **halh, int *msgsize) @@ -226,21 +868,20 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct, if (halh == NULL || msgsize == NULL) return -EINVAL; - kuch = malloc(HAL_MAXSIZE + sizeof(*kuch)); - if (kuch == NULL) - return -ENOMEM; + kuch = ct->kuch; - rc = libcfs_ukuc_msg_get(&ct->kuc, (char *)kuch, +repeat: + rc = libcfs_ukuc_msg_get(ct->kuc, (char *)kuch, HAL_MAXSIZE + sizeof(*kuch), KUC_TRANSPORT_HSM); if (rc < 0) - goto out_free; + goto out_err; /* Handle generic messages */ if (kuch->kuc_transport == KUC_TRANSPORT_GENERIC && kuch->kuc_msgtype == KUC_MSG_SHUTDOWN) { rc = -ESHUTDOWN; - goto out_free; + goto out_err; } if (kuch->kuc_transport != KUC_TRANSPORT_HSM || @@ -249,14 +890,14 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct, "Unknown HSM message type %d:%d\n", kuch->kuc_transport, kuch->kuc_msgtype); rc = -EPROTO; - goto out_free; + goto out_err; } if (kuch->kuc_msglen < sizeof(*kuch) + sizeof(*hal)) { llapi_err_noerrno(LLAPI_MSG_ERROR, "Short HSM message %d", kuch->kuc_msglen); rc = -EPROTO; - goto out_free; + goto out_err; } /* Our message is a hsm_action_list. Use pointer math to skip @@ -266,36 +907,28 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct, /* Check that we have registered for this archive # * if 0 registered, we serve any archive */ - if (ct->archives && - ((1 << (hal->hal_archive_id - 1)) & ct->archives) == 0) { - llapi_err_noerrno(LLAPI_MSG_INFO, - "This copytool does not service archive #%d," - " ignoring this request." - " Mask of served archive is 0x%.8X", - hal->hal_archive_id, ct->archives); - rc = -EAGAIN; + if (ct->kuc != NULL && ct->kuc->lk_data_count != 0) { + int i; + + for (i = 0; i < ct->kuc->lk_data_count; i++) { + if (hal->hal_archive_id == ct->kuc->lk_data[i]) + break; + } - goto out_free; + if (i >= ct->kuc->lk_data_count) + goto repeat; } *halh = hal; *msgsize = kuch->kuc_msglen - sizeof(*kuch); return 0; -out_free: +out_err: *halh = NULL; *msgsize = 0; - free(kuch); return rc; } -/** Release the action list when done with it. */ -void llapi_hsm_action_list_free(struct hsm_action_list **hal) -{ - /* Reuse the llapi_changelog_free function */ - llapi_changelog_free((struct changelog_ext_rec **)hal); -} - /** Get parent path from mount point and fid. * * \param mnt Filesystem root path. @@ -304,7 +937,7 @@ void llapi_hsm_action_list_free(struct hsm_action_list **hal) * \param parent_len Destination buffer size. * \return 0 on success. */ -static int fid_parent(const char *mnt, const lustre_fid *fid, char *parent, +static int fid_parent(const char *mnt, const struct lu_fid *fid, char *parent, size_t parent_len) { int rc; @@ -338,12 +971,73 @@ static int fid_parent(const char *mnt, const lustre_fid *fid, char *parent, return rc; } +static int ct_open_by_fid(const struct hsm_copytool_private *ct, + const struct lu_fid *fid, int open_flags) +{ + char fid_name[FID_NOBRACE_LEN + 1]; + int fd; + + snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid)); + + fd = openat(ct->open_by_fid_fd, fid_name, open_flags); + return fd < 0 ? -errno : fd; +} + +/** + * Get metadata attributes of file by FID. + * + * Use the IOC_MDC_GETFILEINFO ioctl (to send a MDS_GETATTR_NAME RPC) + * to get the attributes of the file identified by \a fid. This + * returns only the attributes stored on the MDT and avoids taking + * layout locks or accessing OST objects. It also bypasses the inode + * cache. Attributes are returned in \a st. + */ +static int ct_md_getattr(const struct hsm_copytool_private *ct, + const struct lu_fid *fid, + lstatx_t *stx) +{ + struct lov_user_mds_data *lmd; + char fname[FID_NOBRACE_LEN + 1] = ""; + size_t lmd_size; + int rc; + + rc = snprintf(fname, sizeof(fname), DFID_NOBRACE, PFID(fid)); + if (rc < 0) + return rc; + if (rc >= sizeof(fname) || rc == 0) + return -EINVAL; + + lmd_size = offsetof(typeof(*lmd), lmd_lmm) + + lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3); + + if (lmd_size < offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX) + lmd_size = offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX; + + lmd = malloc(lmd_size); + if (lmd == NULL) + return -ENOMEM; + + rc = get_lmd_info_fd(fname, ct->open_by_fid_fd, -1, + lmd, lmd_size, GET_LMD_INFO); + if (rc) + goto out; + + *stx = lmd->lmd_stx; +out: + free(lmd); + + return rc; +} + /** Create the destination volatile file for a restore operation. * - * \param hcp Private copyaction handle. + * \param hcp Private copyaction handle. + * \param mdt_index MDT index where to create the volatile file. + * \param flags Volatile file creation flags. * \return 0 on success. */ -static int create_restore_volatile(struct hsm_copyaction_private *hcp) +static int create_restore_volatile(struct hsm_copyaction_private *hcp, + int mdt_index, int open_flags) { int rc; int fd; @@ -355,15 +1049,19 @@ static int create_restore_volatile(struct hsm_copyaction_private *hcp) if (rc < 0) { /* fid_parent() failed, try to keep on going */ llapi_error(LLAPI_MSG_ERROR, rc, - "cannot get parent path to restore "DFID + "cannot get parent path to restore "DFID" " "using '%s'", PFID(&hai->hai_fid), mnt); snprintf(parent, sizeof(parent), "%s", mnt); } - fd = llapi_create_volatile_idx(parent, 0, O_LOV_DELAY_CREATE); + fd = llapi_create_volatile_idx(parent, mdt_index, open_flags); if (fd < 0) return fd; + rc = fchown(fd, hcp->statx.stx_uid, hcp->statx.stx_gid); + if (rc < 0) + goto err_cleanup; + rc = llapi_fd2fid(fd, &hai->hai_dfid); if (rc < 0) goto err_cleanup; @@ -384,25 +1082,34 @@ err_cleanup: * It could be skipped if copytool only want to directly report an error, * \see llapi_hsm_action_end(). * - * \param hcp Opaque action handle to be passed to - * llapi_hsm_action_progress and llapi_hsm_action_end. - * \param ct Copytool handle acquired at registration. - * \param hai The hsm_action_item describing the request. - * \param is_error Whether this call is just to report an error. + * \param hcp Opaque action handle to be passed to + * llapi_hsm_action_progress and llapi_hsm_action_end. + * \param ct Copytool handle acquired at registration. + * \param hai The hsm_action_item describing the request. + * \param restore_mdt_index On restore: MDT index where to create the volatile + * file. Use -1 for default. + * \param restore_open_flags On restore: volatile file creation mode. Use + * O_LOV_DELAY_CREATE to manually set the LOVEA + * afterwards. + * \param is_error Whether this call is just to report an error. * * \return 0 on success. */ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp, const struct hsm_copytool_private *ct, - const struct hsm_action_item *hai, bool is_error) + const struct hsm_action_item *hai, + int restore_mdt_index, int restore_open_flags, + bool is_error) { - struct hsm_copyaction_private *hcp; - int rc; + struct hsm_copyaction_private *hcp; + int fd; + int rc; hcp = calloc(1, sizeof(*hcp)); if (hcp == NULL) return -ENOMEM; + hcp->source_fd = -1; hcp->data_fd = -1; hcp->ct_priv = ct; hcp->copy.hc_hai = *hai; @@ -411,10 +1118,58 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp, if (is_error) goto ok_out; - if (hai->hai_action == HSMA_RESTORE) { - rc = create_restore_volatile(hcp); + if (hai->hai_action == HSMA_ARCHIVE) { + fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid, + O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK); + if (fd < 0) { + rc = fd; + goto err_out; + } + + hcp->source_fd = fd; + } else if (hai->hai_action == HSMA_RESTORE) { + rc = ct_md_getattr(hcp->ct_priv, &hai->hai_fid, &hcp->statx); + if (rc < 0) + goto err_out; + + rc = create_restore_volatile(hcp, restore_mdt_index, + restore_open_flags); if (rc < 0) goto err_out; + } else if (hai->hai_action == HSMA_REMOVE) { + /* Since remove is atomic there is no need to send an + * initial MDS_HSM_PROGRESS RPC. + * RW-PCC uses Lustre HSM mechanism for data synchronization. + * At the beginning of RW-PCC attach, the client tries to + * exclusively open the file by using a lease lock. A + * successful lease open ensures that the current attach + * process is the unique opener for the file. + * After taking the lease, the file data is then copied from + * OSTs into PCC and then the client closes the lease with + * with a PCC attach intent. + * However, for a file with HSM exists, archived state (i.e. a + * cached file just was detached from PCC and restore into + * OST), a HSM REMOVE request may delete the above PCC copy + * during RW-PCC attach wrongly. + * Thus, a open/close on the corresponding Lustre file is added + * for HSMA_REMOVE here to solve this conflict. + */ + fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_fid, + O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK); + if (fd < 0) { + rc = fd; + /* ignore the error in case of Remove Archive on Last + * Unlink (RAoLU). + */ + if (rc == -ENOENT) { + rc = 0; + goto out_log; + } + goto err_out; + } + + hcp->source_fd = fd; + goto out_log; } rc = ioctl(ct->mnt_fd, LL_IOC_HSM_COPY_START, &hcp->copy); @@ -423,12 +1178,18 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp, goto err_out; } +out_log: + llapi_hsm_log_ct_progress(&hcp, hai, CT_START, 0, 0); + ok_out: hcp->magic = CP_PRIV_MAGIC; *phcp = hcp; return 0; err_out: + if (!(hcp->source_fd < 0)) + close(hcp->source_fd); + if (!(hcp->data_fd < 0)) close(hcp->data_fd); @@ -448,7 +1209,7 @@ err_out: * \return 0 on success. */ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp, - const struct hsm_extent *he, int flags, int errval) + const struct hsm_extent *he, int hp_flags, int errval) { struct hsm_copyaction_private *hcp; struct hsm_action_item *hai; @@ -464,6 +1225,31 @@ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp, hai = &hcp->copy.hc_hai; + if (hai->hai_action == HSMA_RESTORE && errval == 0) { + struct ll_futimes_3 lfu = { + .lfu_atime_sec = hcp->statx.stx_atime.tv_sec, + .lfu_atime_nsec = hcp->statx.stx_atime.tv_nsec, + .lfu_mtime_sec = hcp->statx.stx_mtime.tv_sec, + .lfu_mtime_nsec = hcp->statx.stx_mtime.tv_nsec, + .lfu_ctime_sec = hcp->statx.stx_ctime.tv_sec, + .lfu_ctime_nsec = hcp->statx.stx_ctime.tv_nsec, + }; + + rc = fsync(hcp->data_fd); + if (rc < 0) { + errval = -errno; + goto end; + } + + /* Set {a,m,c}time of volatile file to that of original. */ + rc = ioctl(hcp->data_fd, LL_IOC_FUTIMES_3, &lfu); + if (rc < 0) { + errval = -errno; + goto end; + } + } + +end: /* In some cases, like restore, 2 FIDs are used. * Set the right FID to use here. */ if (hai->hai_action == HSMA_ARCHIVE || hai->hai_action == HSMA_RESTORE) @@ -471,7 +1257,7 @@ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp, /* Fill the last missing data that will be needed by * kernel to send a hsm_progress. */ - hcp->copy.hc_flags = flags; + hcp->copy.hc_flags = hp_flags; hcp->copy.hc_errval = abs(errval); hcp->copy.hc_hai.hai_extent = *he; @@ -482,7 +1268,12 @@ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp, goto err_cleanup; } + llapi_hsm_log_ct_progress(&hcp, hai, CT_FINISH, 0, 0); + err_cleanup: + if (!(hcp->source_fd < 0)) + close(hcp->source_fd); + if (!(hcp->data_fd < 0)) close(hcp->data_fd); @@ -495,11 +1286,13 @@ err_cleanup: /** Notify a progress in processing an HSM action. * \param hdl[in,out] handle returned by llapi_hsm_action_start. * \param he[in] the range of copied data (for copy actions). + * \param total[in] the expected total of copied data (for copy actions). * \param hp_flags[in] HSM progress flags. * \return 0 on success. */ int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp, - const struct hsm_extent *he, int hp_flags) + const struct hsm_extent *he, __u64 total, + int hp_flags) { int rc; struct hsm_progress hp; @@ -526,6 +1319,8 @@ int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp, if (rc < 0) rc = -errno; + llapi_hsm_log_ct_progress(&hcp, hai, CT_RUNNING, total, he->length); + return rc; } @@ -533,7 +1328,7 @@ int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp, * @return error code if the action is not a copy operation. */ int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp, - lustre_fid *fid) + struct lu_fid *fid) { const struct hsm_action_item *hai = &hcp->copy.hc_hai; @@ -558,14 +1353,20 @@ int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp, int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp) { const struct hsm_action_item *hai = &hcp->copy.hc_hai; + int fd; if (hcp->magic != CP_PRIV_MAGIC) return -EINVAL; - if (hai->hai_action != HSMA_RESTORE) + if (hai->hai_action == HSMA_ARCHIVE) { + fd = dup(hcp->source_fd); + return fd < 0 ? -errno : fd; + } else if (hai->hai_action == HSMA_RESTORE) { + fd = dup(hcp->data_fd); + return fd < 0 ? -errno : fd; + } else { return -EINVAL; - - return dup(hcp->data_fd); + } } /** @@ -584,7 +1385,7 @@ int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp) int llapi_hsm_import(const char *dst, int archive, const struct stat *st, unsigned long long stripe_size, int stripe_offset, int stripe_count, int stripe_pattern, char *pool_name, - lustre_fid *newfid) + struct lu_fid *newfid) { struct hsm_user_import hui; int fd; @@ -599,9 +1400,9 @@ int llapi_hsm_import(const char *dst, int archive, const struct stat *st, stripe_pattern | LOV_PATTERN_F_RELEASED, pool_name); if (fd < 0) { - llapi_error(LLAPI_MSG_ERROR, -errno, + llapi_error(LLAPI_MSG_ERROR, fd, "cannot create '%s' for import", dst); - return -errno; + return fd; } /* Get the new fid in Lustre. Caller needs to use this fid @@ -624,8 +1425,8 @@ int llapi_hsm_import(const char *dst, int archive, const struct stat *st, hui.hui_mtime_ns = st->st_mtim.tv_nsec; rc = ioctl(fd, LL_IOC_HSM_IMPORT, &hui); if (rc != 0) { - llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst); rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst); goto out_unlink; }