X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Fliblustreapi_hsm.c;h=8d9938bfb8d69309aef4b32fa09e76e49c062723;hb=186b97e68abbc45c0e8d5ae7e2a0d10aaa918db6;hp=3f1b8b9fe2282b39dce381a652050a327c4bfb17;hpb=45c9ef1dfff207086665c764c7d500c00aa03c7f;p=fs%2Flustre-release.git diff --git a/lustre/utils/liblustreapi_hsm.c b/lustre/utils/liblustreapi_hsm.c index 3f1b8b9..8d9938b 100644 --- a/lustre/utils/liblustreapi_hsm.c +++ b/lustre/utils/liblustreapi_hsm.c @@ -6,6 +6,8 @@ * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies * alternatives * + * Copyright (c) 2013, 2017, Intel Corporation. + * * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser General Public License * (LGPL) version 2.1 or (at your discretion) any later version. @@ -30,6 +32,7 @@ * Author: Henri Doreau */ +#include #include #include #include @@ -41,11 +44,12 @@ #include #include #include +#include #include +#include #include #include #include -#include #include #ifdef HAVE_LINUX_UNISTD_H #include @@ -53,9 +57,7 @@ #include #endif -#include -#include -#include +#include #include #include "lustreapi_internal.h" @@ -64,25 +66,24 @@ /****** HSM Copytool API ********/ #define CT_PRIV_MAGIC 0xC0BE2001 struct hsm_copytool_private { - int magic; - char *mnt; - int mnt_fd; - int open_by_fid_fd; - lustre_kernelcomm kuc; - __u32 archives; + int magic; + char *mnt; + struct kuc_hdr *kuch; + int mnt_fd; + int open_by_fid_fd; + struct lustre_kernelcomm *kuc; }; #define CP_PRIV_MAGIC 0x19880429 struct hsm_copyaction_private { __u32 magic; + __u32 source_fd; __s32 data_fd; const struct hsm_copytool_private *ct_priv; struct hsm_copy copy; - struct stat stat; + lstatx_t statx; }; -#include - enum ct_progress_type { CT_START = 0, CT_RUNNING = 50, @@ -113,7 +114,8 @@ enum ct_event { }; /* initialized in llapi_hsm_register_event_fifo() */ -FILE *llapi_hsm_event_fp; +static int llapi_hsm_event_fd = -1; +static bool created_hsm_event_fifo; static inline const char *llapi_hsm_ct_ev2str(int type) { @@ -169,16 +171,18 @@ static inline const char *llapi_hsm_ct_ev2str(int type) * \retval 0 on success. * \retval -errno on error. */ -int llapi_hsm_write_json_event(struct llapi_json_item_list **event) +static int llapi_hsm_write_json_event(struct llapi_json_item_list **event) { int rc; char time_string[40]; + char json_buf[PIPE_BUF]; + FILE *buf_file; time_t event_time = time(0); struct tm time_components; struct llapi_json_item_list *json_items; - /* Noop unless the event fp was initialized */ - if (llapi_hsm_event_fp == NULL) + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) return 0; if (event == NULL || *event == NULL) @@ -203,21 +207,24 @@ int llapi_hsm_write_json_event(struct llapi_json_item_list **event) return rc; } - rc = llapi_json_write_list(event, llapi_hsm_event_fp); - if (rc < 0) { - /* Ignore write failures due to missing reader. */ - if (rc == -EPIPE) - return 0; + buf_file = fmemopen(json_buf, sizeof(json_buf), "w"); + if (buf_file == NULL) + return -errno; - /* Skip llapi_error() here because there's no point - * in creating a JSON-formatted error message about - * failing to write a JSON-formatted message. - */ - fprintf(stderr, - "\nFATAL ERROR IN llapi_hsm_write_list(): rc %d", rc); + rc = llapi_json_write_list(event, buf_file); + if (rc < 0) { + fclose(buf_file); return rc; } + fclose(buf_file); + + if (write(llapi_hsm_event_fd, json_buf, strlen(json_buf)) < 0) { + /* Ignore write failures due to missing reader. */ + if (errno != EPIPE) + return -errno; + } + return 0; } @@ -232,14 +239,18 @@ int llapi_hsm_write_json_event(struct llapi_json_item_list **event) * \retval 0 on success. * \retval -errno on error. */ -int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv, - __u32 event_type) +static int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv, + __u32 event_type) { int rc; char agent_uuid[UUID_MAX]; struct hsm_copytool_private *ct; struct llapi_json_item_list *json_items; + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) + return 0; + if (priv == NULL || *priv == NULL) return -EINVAL; @@ -270,7 +281,7 @@ int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv, goto err; rc = llapi_json_add_item(&json_items, "archive", LLAPI_JSON_INTEGER, - &ct->archives); + &ct->kuc->lk_data_count); if (rc < 0) goto err; @@ -304,16 +315,28 @@ out_free: * llapi_hsm_write_json_event: * * Copytool registration and deregistration: - * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "REGISTER", "archive": 0, "mount_point": "/mnt/lustre", "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"} - * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "UNREGISTER", "archive": 0, "mount_point": "/mnt/lustre", "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"} + * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "REGISTER", + * "archive": 0, "mount_point": "/mnt/lustre", + * "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"} + * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "UNREGISTER", + * "archive": 0, "mount_point": "/mnt/lustre", + * "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"} * * An archive action, start to completion: - * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "ARCHIVE_START", "total_bytes": 0, "lustre_path": "d71.sanity-hsm/f71.sanity-hsm", "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} - * {"event_time": "2014-02-26 14:50:18 -0500", "event_type": "ARCHIVE_RUNNING", "current_bytes": 5242880, "total_bytes": 39000000, "lustre_path": "d71.sanity-hsm/f71.sanity-hsm", "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} - * {"event_time": "2014-02-26 14:50:50 -0500", "event_type": "ARCHIVE_FINISH", "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} + * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "ARCHIVE_START", + * "total_bytes": 0, "lustre_path": "d71.sanity-hsm/f71.sanity-hsm", + * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} + * {"event_time": "2014-02-26 14:50:18 -0500", "event_type": "ARCHIVE_RUNNING", + * "current_bytes": 5242880, "total_bytes": 39000000, + * "lustre_path": "d71.sanity-hsm/f71.sanity-hsm", + * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} + * {"event_time": "2014-02-26 14:50:50 -0500", "event_type": "ARCHIVE_FINISH", + * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"} * * A log message: - * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "LOGGED_MESSAGE", "level": "INFO", "message": "lhsmtool_posix[59401]: copytool fs=lustre archive#=2 item_count=1"} + * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "LOGGED_MESSAGE", + * "level": "INFO", + * "message": "lhsmtool_posix[42]: copytool fs=lustre archive#=2 item_count=1"} * * \param hcp Opaque action handle returned by * llapi_hsm_action_start. @@ -325,9 +348,10 @@ out_free: * \retval 0 on success. * \retval -errno on error. */ -int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp, - const struct hsm_action_item *hai, __u32 progress_type, - __u64 total, __u64 current) +static int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp, + const struct hsm_action_item *hai, + __u32 progress_type, + __u64 total, __u64 current) { int rc; int linkno = 0; @@ -337,6 +361,10 @@ int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp, struct hsm_copyaction_private *hcp; struct llapi_json_item_list *json_items; + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) + return 0; + if (phcp == NULL || *phcp == NULL) return -EINVAL; @@ -400,11 +428,12 @@ int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp, goto err; } - if (progress_type == CT_RUNNING) + if (progress_type == CT_RUNNING) { rc = llapi_json_add_item(&json_items, "current_bytes", LLAPI_JSON_BIGNUM, ¤t); if (rc < 0) goto err; + } cancel: rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING, @@ -441,10 +470,12 @@ out_free: * \retval 0 on success. * \retval -errno on error. */ -int llapi_hsm_register_event_fifo(char *path) +int llapi_hsm_register_event_fifo(const char *path) { - int read_fd, write_fd; + int read_fd; struct stat statbuf; + struct sigaction ignore_action; + int rc; /* Create the FIFO if necessary. */ if ((mkfifo(path, 0644) < 0) && (errno != EEXIST)) { @@ -463,6 +494,8 @@ int llapi_hsm_register_event_fifo(char *path) "not a pipe or has a wrong mode", path); return -errno; } + } else { + created_hsm_event_fifo = true; } /* Open the FIFO for read so that the subsequent open for write @@ -476,30 +509,25 @@ int llapi_hsm_register_event_fifo(char *path) /* Open the FIFO for writes, but don't block on waiting * for a reader. */ - write_fd = open(path, O_WRONLY | O_NONBLOCK); - if (write_fd < 0) { - llapi_error(LLAPI_MSG_ERROR, errno, - "cannot open(%s) for write", path); - return -errno; - } + llapi_hsm_event_fd = open(path, O_WRONLY | O_NONBLOCK); + rc = -errno; /* Now close the reader. An external monitoring process can * now open the FIFO for reads. If no reader comes along the * events are lost. NOTE: Only one reader at a time! */ close(read_fd); - llapi_hsm_event_fp = fdopen(write_fd, "w"); - if (llapi_hsm_event_fp == NULL) { - llapi_error(LLAPI_MSG_ERROR, errno, - "cannot fdopen(%s) for write", path); - return -errno; + if (llapi_hsm_event_fd < 0) { + llapi_error(LLAPI_MSG_ERROR, -rc, + "cannot open(%s) for write", path); + return rc; } /* Ignore SIGPIPEs -- can occur if the reader goes away. */ - signal(SIGPIPE, SIG_IGN); - - /* Don't buffer the event stream. */ - setbuf(llapi_hsm_event_fp, NULL); + memset(&ignore_action, 0, sizeof(ignore_action)); + ignore_action.sa_handler = SIG_IGN; + sigemptyset(&ignore_action.sa_mask); + sigaction(SIGPIPE, &ignore_action, NULL); return 0; } @@ -512,18 +540,21 @@ int llapi_hsm_register_event_fifo(char *path) * \retval 0 on success. * \retval -errno on error. */ -int llapi_hsm_unregister_event_fifo(char *path) +int llapi_hsm_unregister_event_fifo(const char *path) { - /* Noop unless the event fp was initialized */ - if (llapi_hsm_event_fp == NULL) + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) return 0; - if (fclose(llapi_hsm_event_fp) != 0) + if (close(llapi_hsm_event_fd) < 0) return -errno; - unlink(path); + if (created_hsm_event_fifo) { + unlink(path); + created_hsm_event_fifo = false; + } - llapi_hsm_event_fp = NULL; + llapi_hsm_event_fd = -1; return 0; } @@ -550,8 +581,8 @@ void llapi_hsm_log_error(enum llapi_message_level level, int _rc, va_list args2; struct llapi_json_item_list *json_items; - /* Noop unless the event fp was initialized */ - if (llapi_hsm_event_fp == NULL) + /* Noop unless the event fd was initialized */ + if (llapi_hsm_event_fd < 0) return; rc = llapi_json_init_list(&json_items); @@ -626,20 +657,23 @@ err: out_free: if (json_items != NULL) llapi_json_destroy_list(&json_items); - - return; } /** Register a copytool - * \param[out] priv Opaque private control structure - * \param mnt Lustre filesystem mount point - * \param flags Open flags, currently unused (e.g. O_NONBLOCK) - * \param archive_count - * \param archives Which archive numbers this copytool is responsible for + * \param[out] priv Opaque private control structure + * \param mnt Lustre filesystem mount point + * \param archive_count Number of valid archive IDs in \a archives + * \param archives Which archive numbers this copytool is + * responsible for + * \param rfd_flags flags applied to read fd of pipe + * (e.g. O_NONBLOCK) + * + * \retval 0 on success. + * \retval -errno on error. */ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv, - const char *mnt, int flags, int archive_count, - int *archives) + const char *mnt, int archive_count, + int *archives, int rfd_flags) { struct hsm_copytool_private *ct; int rc; @@ -650,6 +684,16 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv, return -EINVAL; } + for (rc = 0; rc < archive_count; rc++) { + /* in the list we have an all archive wildcard + * so move to all archives mode + */ + if (archives[rc] == 0) { + archive_count = 0; + break; + } + } + ct = calloc(1, sizeof(*ct)); if (ct == NULL) return -ENOMEM; @@ -657,8 +701,6 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv, ct->magic = CT_PRIV_MAGIC; ct->mnt_fd = -1; ct->open_by_fid_fd = -1; - ct->kuc.lk_rfd = LK_NOFD; - ct->kuc.lk_wfd = LK_NOFD; ct->mnt = strdup(mnt); if (ct->mnt == NULL) { @@ -666,6 +708,12 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv, goto out_err; } + ct->kuch = calloc(1, HAL_MAXSIZE + sizeof(*ct->kuch)); + if (ct->kuch == NULL) { + rc = -ENOMEM; + goto out_err; + } + ct->mnt_fd = open(ct->mnt, O_RDONLY); if (ct->mnt_fd < 0) { rc = -errno; @@ -678,56 +726,56 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv, goto out_err; } - /* no archives specified means "match all". */ - ct->archives = 0; - for (rc = 0; rc < archive_count; rc++) { - if (archives[rc] > 8 * sizeof(ct->archives)) { - llapi_err_noerrno(LLAPI_MSG_ERROR, - "maximum of %zu archives supported", - 8 * sizeof(ct->archives)); - goto out_err; - } - /* in the list we have a all archive wildcard - * so move to all archives mode - */ - if (archives[rc] == 0) { - ct->archives = 0; - archive_count = 0; - break; - } - ct->archives |= (1 << (archives[rc] - 1)); + ct->kuc = malloc(sizeof(*ct) + archive_count * sizeof(__u32)); + if (ct->kuc == NULL) { + rc = -ENOMEM; + goto out_err; } - rc = libcfs_ukuc_start(&ct->kuc, KUC_GRP_HSM); + ct->kuc->lk_rfd = LK_NOFD; + ct->kuc->lk_wfd = LK_NOFD; + + rc = libcfs_ukuc_start(ct->kuc, KUC_GRP_HSM, rfd_flags); if (rc < 0) - goto out_err; + goto out_free_kuc; + + ct->kuc->lk_flags = LK_FLG_DATANR; + ct->kuc->lk_data_count = archive_count; + for (rc = 0; rc < archive_count; rc++) { + if (archives[rc] < 0) { + llapi_err_noerrno(LLAPI_MSG_ERROR, "%d requested when " + "archive id >= 0 is supported", + archives[rc]); + rc = -EINVAL; + goto out_kuc; + } + + ct->kuc->lk_data[rc] = archives[rc]; + } - /* Storing archive(s) in lk_data; see mdc_ioc_hsm_ct_start */ - ct->kuc.lk_data = ct->archives; - rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc); + rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc); if (rc < 0) { rc = -errno; llapi_error(LLAPI_MSG_ERROR, rc, "cannot start copytool on '%s'", mnt); - goto out_err; - } else { - rc = 0; + goto out_kuc; } llapi_hsm_log_ct_registration(&ct, CT_REGISTER); /* Only the kernel reference keeps the write side open */ - close(ct->kuc.lk_wfd); - ct->kuc.lk_wfd = LK_NOFD; - if (rc < 0) - goto out_kuc; - + close(ct->kuc->lk_wfd); + ct->kuc->lk_wfd = LK_NOFD; *priv = ct; + return 0; out_kuc: /* cleanup the kuc channel */ - libcfs_ukuc_stop(&ct->kuc); + libcfs_ukuc_stop(ct->kuc); + +out_free_kuc: + free(ct->kuc); out_err: if (!(ct->mnt_fd < 0)) @@ -736,8 +784,9 @@ out_err: if (!(ct->open_by_fid_fd < 0)) close(ct->open_by_fid_fd); - if (ct->mnt != NULL) - free(ct->mnt); + free(ct->mnt); + + free(ct->kuch); free(ct); @@ -760,30 +809,51 @@ int llapi_hsm_copytool_unregister(struct hsm_copytool_private **priv) if (ct->magic != CT_PRIV_MAGIC) return -EINVAL; - /* Tell the kernel to stop sending us messages */ - ct->kuc.lk_flags = LK_FLG_STOP; - ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc); + /* Close the read side of the KUC pipe. This should be done + * before unregistering to avoid deadlock: a ldlm_cb thread + * enters libcfs_kkuc_group_put() acquires kg_sem and blocks + * in pipe_write() due to full pipe; then we attempt to + * unregister and block on kg_sem. */ + libcfs_ukuc_stop(ct->kuc); - /* Shut down the kernelcomms */ - libcfs_ukuc_stop(&ct->kuc); + /* Tell the kernel to stop sending us messages */ + ct->kuc->lk_flags = LK_FLG_STOP; + ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc); llapi_hsm_log_ct_registration(&ct, CT_UNREGISTER); close(ct->open_by_fid_fd); close(ct->mnt_fd); free(ct->mnt); + free(ct->kuch); + free(ct->kuc); free(ct); *priv = NULL; return 0; } +/** Returns a file descriptor to poll/select on. + * \param ct Opaque private control structure + * \retval -EINVAL on error + * \retval the file descriptor for reading HSM events from the kernel + */ +int llapi_hsm_copytool_get_fd(struct hsm_copytool_private *ct) +{ + if (ct == NULL || ct->magic != CT_PRIV_MAGIC) + return -EINVAL; + + return libcfs_ukuc_get_rfd(ct->kuc); +} + /** Wait for the next hsm_action_list * \param ct Opaque private control structure * \param halh Action list handle, will be allocated here * \param msgsize Number of bytes in the message, will be set here * \return 0 valid message received; halh and msgsize are set * <0 error code + * Note: The application must not call llapi_hsm_copytool_recv until it has + * cleared the data in ct->kuch from the previous call. */ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct, struct hsm_action_list **halh, int *msgsize) @@ -798,21 +868,20 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct, if (halh == NULL || msgsize == NULL) return -EINVAL; - kuch = malloc(HAL_MAXSIZE + sizeof(*kuch)); - if (kuch == NULL) - return -ENOMEM; + kuch = ct->kuch; - rc = libcfs_ukuc_msg_get(&ct->kuc, (char *)kuch, +repeat: + rc = libcfs_ukuc_msg_get(ct->kuc, (char *)kuch, HAL_MAXSIZE + sizeof(*kuch), KUC_TRANSPORT_HSM); if (rc < 0) - goto out_free; + goto out_err; /* Handle generic messages */ if (kuch->kuc_transport == KUC_TRANSPORT_GENERIC && kuch->kuc_msgtype == KUC_MSG_SHUTDOWN) { rc = -ESHUTDOWN; - goto out_free; + goto out_err; } if (kuch->kuc_transport != KUC_TRANSPORT_HSM || @@ -821,14 +890,14 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct, "Unknown HSM message type %d:%d\n", kuch->kuc_transport, kuch->kuc_msgtype); rc = -EPROTO; - goto out_free; + goto out_err; } if (kuch->kuc_msglen < sizeof(*kuch) + sizeof(*hal)) { llapi_err_noerrno(LLAPI_MSG_ERROR, "Short HSM message %d", kuch->kuc_msglen); rc = -EPROTO; - goto out_free; + goto out_err; } /* Our message is a hsm_action_list. Use pointer math to skip @@ -838,36 +907,28 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct, /* Check that we have registered for this archive # * if 0 registered, we serve any archive */ - if (ct->archives && - ((1 << (hal->hal_archive_id - 1)) & ct->archives) == 0) { - llapi_err_noerrno(LLAPI_MSG_INFO, - "This copytool does not service archive #%d," - " ignoring this request." - " Mask of served archive is 0x%.8X", - hal->hal_archive_id, ct->archives); - rc = -EAGAIN; - - goto out_free; + if (ct->kuc != NULL && ct->kuc->lk_data_count != 0) { + int i; + + for (i = 0; i < ct->kuc->lk_data_count; i++) { + if (hal->hal_archive_id == ct->kuc->lk_data[i]) + break; + } + + if (i >= ct->kuc->lk_data_count) + goto repeat; } *halh = hal; *msgsize = kuch->kuc_msglen - sizeof(*kuch); return 0; -out_free: +out_err: *halh = NULL; *msgsize = 0; - free(kuch); return rc; } -/** Release the action list when done with it. */ -void llapi_hsm_action_list_free(struct hsm_action_list **hal) -{ - /* Reuse the llapi_changelog_free function */ - llapi_changelog_free((struct changelog_ext_rec **)hal); -} - /** Get parent path from mount point and fid. * * \param mnt Filesystem root path. @@ -876,7 +937,7 @@ void llapi_hsm_action_list_free(struct hsm_action_list **hal) * \param parent_len Destination buffer size. * \return 0 on success. */ -static int fid_parent(const char *mnt, const lustre_fid *fid, char *parent, +static int fid_parent(const char *mnt, const struct lu_fid *fid, char *parent, size_t parent_len) { int rc; @@ -914,21 +975,58 @@ static int ct_open_by_fid(const struct hsm_copytool_private *ct, const struct lu_fid *fid, int open_flags) { char fid_name[FID_NOBRACE_LEN + 1]; + int fd; snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid)); - return openat(ct->open_by_fid_fd, fid_name, open_flags); + fd = openat(ct->open_by_fid_fd, fid_name, open_flags); + return fd < 0 ? -errno : fd; } -static int ct_stat_by_fid(const struct hsm_copytool_private *ct, - const struct lu_fid *fid, - struct stat *buf) +/** + * Get metadata attributes of file by FID. + * + * Use the IOC_MDC_GETFILEINFO ioctl (to send a MDS_GETATTR_NAME RPC) + * to get the attributes of the file identified by \a fid. This + * returns only the attributes stored on the MDT and avoids taking + * layout locks or accessing OST objects. It also bypasses the inode + * cache. Attributes are returned in \a st. + */ +static int ct_md_getattr(const struct hsm_copytool_private *ct, + const struct lu_fid *fid, + lstatx_t *stx) { - char fid_name[FID_NOBRACE_LEN + 1]; + struct lov_user_mds_data *lmd; + char fname[FID_NOBRACE_LEN + 1] = ""; + size_t lmd_size; + int rc; - snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid)); + rc = snprintf(fname, sizeof(fname), DFID_NOBRACE, PFID(fid)); + if (rc < 0) + return rc; + if (rc >= sizeof(fname) || rc == 0) + return -EINVAL; - return fstatat(ct->open_by_fid_fd, fid_name, buf, 0); + lmd_size = offsetof(typeof(*lmd), lmd_lmm) + + lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3); + + if (lmd_size < offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX) + lmd_size = offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX; + + lmd = malloc(lmd_size); + if (lmd == NULL) + return -ENOMEM; + + rc = get_lmd_info_fd(fname, ct->open_by_fid_fd, -1, + lmd, lmd_size, GET_LMD_INFO); + if (rc) + goto out; + + *stx = lmd->lmd_stx; +out: + free(lmd); + + return rc; } /** Create the destination volatile file for a restore operation. @@ -960,7 +1058,7 @@ static int create_restore_volatile(struct hsm_copyaction_private *hcp, if (fd < 0) return fd; - rc = fchown(fd, hcp->stat.st_uid, hcp->stat.st_gid); + rc = fchown(fd, hcp->statx.stx_uid, hcp->statx.stx_gid); if (rc < 0) goto err_cleanup; @@ -1003,13 +1101,15 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp, int restore_mdt_index, int restore_open_flags, bool is_error) { - struct hsm_copyaction_private *hcp; - int rc; + struct hsm_copyaction_private *hcp; + int fd; + int rc; hcp = calloc(1, sizeof(*hcp)); if (hcp == NULL) return -ENOMEM; + hcp->source_fd = -1; hcp->data_fd = -1; hcp->ct_priv = ct; hcp->copy.hc_hai = *hai; @@ -1018,8 +1118,17 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp, if (is_error) goto ok_out; - if (hai->hai_action == HSMA_RESTORE) { - rc = ct_stat_by_fid(hcp->ct_priv, &hai->hai_fid, &hcp->stat); + if (hai->hai_action == HSMA_ARCHIVE) { + fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid, + O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK); + if (fd < 0) { + rc = fd; + goto err_out; + } + + hcp->source_fd = fd; + } else if (hai->hai_action == HSMA_RESTORE) { + rc = ct_md_getattr(hcp->ct_priv, &hai->hai_fid, &hcp->statx); if (rc < 0) goto err_out; @@ -1027,6 +1136,40 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp, restore_open_flags); if (rc < 0) goto err_out; + } else if (hai->hai_action == HSMA_REMOVE) { + /* Since remove is atomic there is no need to send an + * initial MDS_HSM_PROGRESS RPC. + * RW-PCC uses Lustre HSM mechanism for data synchronization. + * At the beginning of RW-PCC attach, the client tries to + * exclusively open the file by using a lease lock. A + * successful lease open ensures that the current attach + * process is the unique opener for the file. + * After taking the lease, the file data is then copied from + * OSTs into PCC and then the client closes the lease with + * with a PCC attach intent. + * However, for a file with HSM exists, archived state (i.e. a + * cached file just was detached from PCC and restore into + * OST), a HSM REMOVE request may delete the above PCC copy + * during RW-PCC attach wrongly. + * Thus, a open/close on the corresponding Lustre file is added + * for HSMA_REMOVE here to solve this conflict. + */ + fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_fid, + O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK); + if (fd < 0) { + rc = fd; + /* ignore the error in case of Remove Archive on Last + * Unlink (RAoLU). + */ + if (rc == -ENOENT) { + rc = 0; + goto out_log; + } + goto err_out; + } + + hcp->source_fd = fd; + goto out_log; } rc = ioctl(ct->mnt_fd, LL_IOC_HSM_COPY_START, &hcp->copy); @@ -1035,6 +1178,7 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp, goto err_out; } +out_log: llapi_hsm_log_ct_progress(&hcp, hai, CT_START, 0, 0); ok_out: @@ -1043,6 +1187,9 @@ ok_out: return 0; err_out: + if (!(hcp->source_fd < 0)) + close(hcp->source_fd); + if (!(hcp->data_fd < 0)) close(hcp->data_fd); @@ -1079,19 +1226,23 @@ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp, hai = &hcp->copy.hc_hai; if (hai->hai_action == HSMA_RESTORE && errval == 0) { - struct timeval tv[2]; - - /* Set {a,m}time of volatile file to that of original. */ - tv[0].tv_sec = hcp->stat.st_atime; - tv[0].tv_usec = 0; - tv[1].tv_sec = hcp->stat.st_mtime; - tv[1].tv_usec = 0; - if (futimes(hcp->data_fd, tv) < 0) { + struct ll_futimes_3 lfu = { + .lfu_atime_sec = hcp->statx.stx_atime.tv_sec, + .lfu_atime_nsec = hcp->statx.stx_atime.tv_nsec, + .lfu_mtime_sec = hcp->statx.stx_mtime.tv_sec, + .lfu_mtime_nsec = hcp->statx.stx_mtime.tv_nsec, + .lfu_ctime_sec = hcp->statx.stx_ctime.tv_sec, + .lfu_ctime_nsec = hcp->statx.stx_ctime.tv_nsec, + }; + + rc = fsync(hcp->data_fd); + if (rc < 0) { errval = -errno; goto end; } - rc = fsync(hcp->data_fd); + /* Set {a,m,c}time of volatile file to that of original. */ + rc = ioctl(hcp->data_fd, LL_IOC_FUTIMES_3, &lfu); if (rc < 0) { errval = -errno; goto end; @@ -1120,6 +1271,9 @@ end: llapi_hsm_log_ct_progress(&hcp, hai, CT_FINISH, 0, 0); err_cleanup: + if (!(hcp->source_fd < 0)) + close(hcp->source_fd); + if (!(hcp->data_fd < 0)) close(hcp->data_fd); @@ -1174,7 +1328,7 @@ int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp, * @return error code if the action is not a copy operation. */ int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp, - lustre_fid *fid) + struct lu_fid *fid) { const struct hsm_action_item *hai = &hcp->copy.hc_hai; @@ -1199,17 +1353,20 @@ int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp, int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp) { const struct hsm_action_item *hai = &hcp->copy.hc_hai; + int fd; if (hcp->magic != CP_PRIV_MAGIC) return -EINVAL; - if (hai->hai_action == HSMA_ARCHIVE) - return ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid, - O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK); - else if (hai->hai_action == HSMA_RESTORE) - return dup(hcp->data_fd); - else + if (hai->hai_action == HSMA_ARCHIVE) { + fd = dup(hcp->source_fd); + return fd < 0 ? -errno : fd; + } else if (hai->hai_action == HSMA_RESTORE) { + fd = dup(hcp->data_fd); + return fd < 0 ? -errno : fd; + } else { return -EINVAL; + } } /** @@ -1228,7 +1385,7 @@ int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp) int llapi_hsm_import(const char *dst, int archive, const struct stat *st, unsigned long long stripe_size, int stripe_offset, int stripe_count, int stripe_pattern, char *pool_name, - lustre_fid *newfid) + struct lu_fid *newfid) { struct hsm_user_import hui; int fd; @@ -1243,9 +1400,9 @@ int llapi_hsm_import(const char *dst, int archive, const struct stat *st, stripe_pattern | LOV_PATTERN_F_RELEASED, pool_name); if (fd < 0) { - llapi_error(LLAPI_MSG_ERROR, -errno, + llapi_error(LLAPI_MSG_ERROR, fd, "cannot create '%s' for import", dst); - return -errno; + return fd; } /* Get the new fid in Lustre. Caller needs to use this fid @@ -1268,8 +1425,8 @@ int llapi_hsm_import(const char *dst, int archive, const struct stat *st, hui.hui_mtime_ns = st->st_mtim.tv_nsec; rc = ioctl(fd, LL_IOC_HSM_IMPORT, &hui); if (rc != 0) { - llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst); rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst); goto out_unlink; }