* (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
* alternatives
*
+ * Copyright (c) 2013, 2017, Intel Corporation.
+ *
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser General Public License
* (LGPL) version 2.1 or (at your discretion) any later version.
* Author: Henri Doreau <henri.doreau@cea.fr>
*/
+#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <dirent.h>
#include <stdarg.h>
#include <sys/stat.h>
+#include <sys/time.h>
#include <sys/types.h>
+#include <time.h>
#include <utime.h>
#include <sys/syscall.h>
#include <fnmatch.h>
-#include <glob.h>
#include <signal.h>
#ifdef HAVE_LINUX_UNISTD_H
#include <linux/unistd.h>
#include <unistd.h>
#endif
-#include <libcfs/libcfs.h>
-#include <lnet/lnetctl.h>
-#include <lustre/lustre_idl.h>
+#include <linux/lnet/lnetctl.h>
#include <lustre/lustreapi.h>
#include "lustreapi_internal.h"
/****** HSM Copytool API ********/
#define CT_PRIV_MAGIC 0xC0BE2001
struct hsm_copytool_private {
- int magic;
- char *mnt;
- struct kuc_hdr *kuch;
- int mnt_fd;
- int open_by_fid_fd;
- lustre_kernelcomm kuc;
- __u32 archives;
+ int magic;
+ char *mnt;
+ struct kuc_hdr *kuch;
+ int mnt_fd;
+ int open_by_fid_fd;
+ struct lustre_kernelcomm *kuc;
};
#define CP_PRIV_MAGIC 0x19880429
struct hsm_copyaction_private {
__u32 magic;
+ __u32 source_fd;
__s32 data_fd;
const struct hsm_copytool_private *ct_priv;
struct hsm_copy copy;
- struct stat stat;
+ lstatx_t statx;
};
-#include <libcfs/libcfs.h>
-
enum ct_progress_type {
CT_START = 0,
CT_RUNNING = 50,
};
/* initialized in llapi_hsm_register_event_fifo() */
-int llapi_hsm_event_fd = -1;
+static int llapi_hsm_event_fd = -1;
+static bool created_hsm_event_fifo;
static inline const char *llapi_hsm_ct_ev2str(int type)
{
* \retval 0 on success.
* \retval -errno on error.
*/
-int llapi_hsm_write_json_event(struct llapi_json_item_list **event)
+static int llapi_hsm_write_json_event(struct llapi_json_item_list **event)
{
int rc;
char time_string[40];
* \retval 0 on success.
* \retval -errno on error.
*/
-int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv,
- __u32 event_type)
+static int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv,
+ __u32 event_type)
{
int rc;
char agent_uuid[UUID_MAX];
struct hsm_copytool_private *ct;
struct llapi_json_item_list *json_items;
+ /* Noop unless the event fd was initialized */
+ if (llapi_hsm_event_fd < 0)
+ return 0;
+
if (priv == NULL || *priv == NULL)
return -EINVAL;
goto err;
rc = llapi_json_add_item(&json_items, "archive", LLAPI_JSON_INTEGER,
- &ct->archives);
+ &ct->kuc->lk_data_count);
if (rc < 0)
goto err;
* \retval 0 on success.
* \retval -errno on error.
*/
-int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
- const struct hsm_action_item *hai, __u32 progress_type,
- __u64 total, __u64 current)
+static int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
+ const struct hsm_action_item *hai,
+ __u32 progress_type,
+ __u64 total, __u64 current)
{
int rc;
int linkno = 0;
struct hsm_copyaction_private *hcp;
struct llapi_json_item_list *json_items;
+ /* Noop unless the event fd was initialized */
+ if (llapi_hsm_event_fd < 0)
+ return 0;
+
if (phcp == NULL || *phcp == NULL)
return -EINVAL;
goto err;
}
- if (progress_type == CT_RUNNING)
+ if (progress_type == CT_RUNNING) {
rc = llapi_json_add_item(&json_items, "current_bytes",
LLAPI_JSON_BIGNUM, ¤t);
if (rc < 0)
goto err;
+ }
cancel:
rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
* \retval 0 on success.
* \retval -errno on error.
*/
-int llapi_hsm_register_event_fifo(char *path)
+int llapi_hsm_register_event_fifo(const char *path)
{
int read_fd;
struct stat statbuf;
+ struct sigaction ignore_action;
+ int rc;
/* Create the FIFO if necessary. */
if ((mkfifo(path, 0644) < 0) && (errno != EEXIST)) {
"not a pipe or has a wrong mode", path);
return -errno;
}
+ } else {
+ created_hsm_event_fifo = true;
}
/* Open the FIFO for read so that the subsequent open for write
/* Open the FIFO for writes, but don't block on waiting
* for a reader. */
llapi_hsm_event_fd = open(path, O_WRONLY | O_NONBLOCK);
- if (llapi_hsm_event_fd < 0) {
- llapi_error(LLAPI_MSG_ERROR, errno,
- "cannot open(%s) for write", path);
- return -errno;
- }
+ rc = -errno;
/* Now close the reader. An external monitoring process can
* now open the FIFO for reads. If no reader comes along the
* events are lost. NOTE: Only one reader at a time! */
close(read_fd);
+ if (llapi_hsm_event_fd < 0) {
+ llapi_error(LLAPI_MSG_ERROR, -rc,
+ "cannot open(%s) for write", path);
+ return rc;
+ }
+
/* Ignore SIGPIPEs -- can occur if the reader goes away. */
- signal(SIGPIPE, SIG_IGN);
+ memset(&ignore_action, 0, sizeof(ignore_action));
+ ignore_action.sa_handler = SIG_IGN;
+ sigemptyset(&ignore_action.sa_mask);
+ sigaction(SIGPIPE, &ignore_action, NULL);
return 0;
}
* \retval 0 on success.
* \retval -errno on error.
*/
-int llapi_hsm_unregister_event_fifo(char *path)
+int llapi_hsm_unregister_event_fifo(const char *path)
{
/* Noop unless the event fd was initialized */
if (llapi_hsm_event_fd < 0)
if (close(llapi_hsm_event_fd) < 0)
return -errno;
- unlink(path);
+ if (created_hsm_event_fifo) {
+ unlink(path);
+ created_hsm_event_fifo = false;
+ }
llapi_hsm_event_fd = -1;
out_free:
if (json_items != NULL)
llapi_json_destroy_list(&json_items);
-
- return;
}
/** Register a copytool
- * \param[out] priv Opaque private control structure
- * \param mnt Lustre filesystem mount point
- * \param flags Open flags, currently unused (e.g. O_NONBLOCK)
- * \param archive_count
- * \param archives Which archive numbers this copytool is responsible for
+ * \param[out] priv Opaque private control structure
+ * \param mnt Lustre filesystem mount point
+ * \param archive_count Number of valid archive IDs in \a archives
+ * \param archives Which archive numbers this copytool is
+ * responsible for
+ * \param rfd_flags flags applied to read fd of pipe
+ * (e.g. O_NONBLOCK)
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
*/
int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
- const char *mnt, int flags, int archive_count,
- int *archives)
+ const char *mnt, int archive_count,
+ int *archives, int rfd_flags)
{
struct hsm_copytool_private *ct;
int rc;
return -EINVAL;
}
+ for (rc = 0; rc < archive_count; rc++) {
+ /* in the list we have an all archive wildcard
+ * so move to all archives mode
+ */
+ if (archives[rc] == 0) {
+ archive_count = 0;
+ break;
+ }
+ }
+
ct = calloc(1, sizeof(*ct));
if (ct == NULL)
return -ENOMEM;
ct->magic = CT_PRIV_MAGIC;
ct->mnt_fd = -1;
ct->open_by_fid_fd = -1;
- ct->kuc.lk_rfd = LK_NOFD;
- ct->kuc.lk_wfd = LK_NOFD;
ct->mnt = strdup(mnt);
if (ct->mnt == NULL) {
goto out_err;
}
- ct->kuch = malloc(HAL_MAXSIZE + sizeof(*ct->kuch));
+ ct->kuch = calloc(1, HAL_MAXSIZE + sizeof(*ct->kuch));
if (ct->kuch == NULL) {
rc = -ENOMEM;
goto out_err;
goto out_err;
}
- /* no archives specified means "match all". */
- ct->archives = 0;
- for (rc = 0; rc < archive_count; rc++) {
- if (archives[rc] > 8 * sizeof(ct->archives)) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "maximum of %zu archives supported",
- 8 * sizeof(ct->archives));
- goto out_err;
- }
- /* in the list we have a all archive wildcard
- * so move to all archives mode
- */
- if (archives[rc] == 0) {
- ct->archives = 0;
- archive_count = 0;
- break;
- }
- ct->archives |= (1 << (archives[rc] - 1));
+ ct->kuc = malloc(sizeof(*ct) + archive_count * sizeof(__u32));
+ if (ct->kuc == NULL) {
+ rc = -ENOMEM;
+ goto out_err;
}
- rc = libcfs_ukuc_start(&ct->kuc, KUC_GRP_HSM);
+ ct->kuc->lk_rfd = LK_NOFD;
+ ct->kuc->lk_wfd = LK_NOFD;
+
+ rc = libcfs_ukuc_start(ct->kuc, KUC_GRP_HSM, rfd_flags);
if (rc < 0)
- goto out_err;
+ goto out_free_kuc;
+
+ ct->kuc->lk_flags = LK_FLG_DATANR;
+ ct->kuc->lk_data_count = archive_count;
+ for (rc = 0; rc < archive_count; rc++) {
+ if (archives[rc] < 0) {
+ llapi_err_noerrno(LLAPI_MSG_ERROR, "%d requested when "
+ "archive id >= 0 is supported",
+ archives[rc]);
+ rc = -EINVAL;
+ goto out_kuc;
+ }
+
+ ct->kuc->lk_data[rc] = archives[rc];
+ }
- /* Storing archive(s) in lk_data; see mdc_ioc_hsm_ct_start */
- ct->kuc.lk_data = ct->archives;
- rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc);
+ rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc);
if (rc < 0) {
rc = -errno;
llapi_error(LLAPI_MSG_ERROR, rc,
"cannot start copytool on '%s'", mnt);
- goto out_err;
- } else {
- rc = 0;
+ goto out_kuc;
}
llapi_hsm_log_ct_registration(&ct, CT_REGISTER);
/* Only the kernel reference keeps the write side open */
- close(ct->kuc.lk_wfd);
- ct->kuc.lk_wfd = LK_NOFD;
- if (rc < 0)
- goto out_kuc;
-
+ close(ct->kuc->lk_wfd);
+ ct->kuc->lk_wfd = LK_NOFD;
*priv = ct;
+
return 0;
out_kuc:
/* cleanup the kuc channel */
- libcfs_ukuc_stop(&ct->kuc);
+ libcfs_ukuc_stop(ct->kuc);
+
+out_free_kuc:
+ free(ct->kuc);
out_err:
if (!(ct->mnt_fd < 0))
if (ct->magic != CT_PRIV_MAGIC)
return -EINVAL;
- /* Tell the kernel to stop sending us messages */
- ct->kuc.lk_flags = LK_FLG_STOP;
- ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc);
+ /* Close the read side of the KUC pipe. This should be done
+ * before unregistering to avoid deadlock: a ldlm_cb thread
+ * enters libcfs_kkuc_group_put() acquires kg_sem and blocks
+ * in pipe_write() due to full pipe; then we attempt to
+ * unregister and block on kg_sem. */
+ libcfs_ukuc_stop(ct->kuc);
- /* Shut down the kernelcomms */
- libcfs_ukuc_stop(&ct->kuc);
+ /* Tell the kernel to stop sending us messages */
+ ct->kuc->lk_flags = LK_FLG_STOP;
+ ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc);
llapi_hsm_log_ct_registration(&ct, CT_UNREGISTER);
close(ct->mnt_fd);
free(ct->mnt);
free(ct->kuch);
+ free(ct->kuc);
free(ct);
*priv = NULL;
return 0;
}
+/** Returns a file descriptor to poll/select on.
+ * \param ct Opaque private control structure
+ * \retval -EINVAL on error
+ * \retval the file descriptor for reading HSM events from the kernel
+ */
+int llapi_hsm_copytool_get_fd(struct hsm_copytool_private *ct)
+{
+ if (ct == NULL || ct->magic != CT_PRIV_MAGIC)
+ return -EINVAL;
+
+ return libcfs_ukuc_get_rfd(ct->kuc);
+}
+
/** Wait for the next hsm_action_list
* \param ct Opaque private control structure
* \param halh Action list handle, will be allocated here
kuch = ct->kuch;
- rc = libcfs_ukuc_msg_get(&ct->kuc, (char *)kuch,
+repeat:
+ rc = libcfs_ukuc_msg_get(ct->kuc, (char *)kuch,
HAL_MAXSIZE + sizeof(*kuch),
KUC_TRANSPORT_HSM);
if (rc < 0)
/* Check that we have registered for this archive #
* if 0 registered, we serve any archive */
- if (ct->archives &&
- ((1 << (hal->hal_archive_id - 1)) & ct->archives) == 0) {
- llapi_err_noerrno(LLAPI_MSG_INFO,
- "This copytool does not service archive #%d,"
- " ignoring this request."
- " Mask of served archive is 0x%.8X",
- hal->hal_archive_id, ct->archives);
- rc = -EAGAIN;
+ if (ct->kuc != NULL && ct->kuc->lk_data_count != 0) {
+ int i;
- goto out_err;
+ for (i = 0; i < ct->kuc->lk_data_count; i++) {
+ if (hal->hal_archive_id == ct->kuc->lk_data[i])
+ break;
+ }
+
+ if (i >= ct->kuc->lk_data_count)
+ goto repeat;
}
*halh = hal;
* \param parent_len Destination buffer size.
* \return 0 on success.
*/
-static int fid_parent(const char *mnt, const lustre_fid *fid, char *parent,
+static int fid_parent(const char *mnt, const struct lu_fid *fid, char *parent,
size_t parent_len)
{
int rc;
return fd < 0 ? -errno : fd;
}
-static int ct_stat_by_fid(const struct hsm_copytool_private *ct,
- const struct lu_fid *fid,
- struct stat *buf)
+/**
+ * Get metadata attributes of file by FID.
+ *
+ * Use the IOC_MDC_GETFILEINFO ioctl (to send a MDS_GETATTR_NAME RPC)
+ * to get the attributes of the file identified by \a fid. This
+ * returns only the attributes stored on the MDT and avoids taking
+ * layout locks or accessing OST objects. It also bypasses the inode
+ * cache. Attributes are returned in \a st.
+ */
+static int ct_md_getattr(const struct hsm_copytool_private *ct,
+ const struct lu_fid *fid,
+ lstatx_t *stx)
{
- char fid_name[FID_NOBRACE_LEN + 1];
+ struct lov_user_mds_data *lmd;
+ char fname[FID_NOBRACE_LEN + 1] = "";
+ size_t lmd_size;
int rc;
- snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid));
+ rc = snprintf(fname, sizeof(fname), DFID_NOBRACE, PFID(fid));
+ if (rc < 0)
+ return rc;
+ if (rc >= sizeof(fname) || rc == 0)
+ return -EINVAL;
+
+ lmd_size = offsetof(typeof(*lmd), lmd_lmm) +
+ lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3);
+
+ if (lmd_size < offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX)
+ lmd_size = offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX;
- rc = fstatat(ct->open_by_fid_fd, fid_name, buf, 0);
- return rc ? -errno : 0;
+ lmd = malloc(lmd_size);
+ if (lmd == NULL)
+ return -ENOMEM;
+
+ rc = get_lmd_info_fd(fname, ct->open_by_fid_fd, -1,
+ lmd, lmd_size, GET_LMD_INFO);
+ if (rc)
+ goto out;
+
+ *stx = lmd->lmd_stx;
+out:
+ free(lmd);
+
+ return rc;
}
/** Create the destination volatile file for a restore operation.
if (fd < 0)
return fd;
- rc = fchown(fd, hcp->stat.st_uid, hcp->stat.st_gid);
+ rc = fchown(fd, hcp->statx.stx_uid, hcp->statx.stx_gid);
if (rc < 0)
goto err_cleanup;
int restore_mdt_index, int restore_open_flags,
bool is_error)
{
- struct hsm_copyaction_private *hcp;
- int rc;
+ struct hsm_copyaction_private *hcp;
+ int fd;
+ int rc;
hcp = calloc(1, sizeof(*hcp));
if (hcp == NULL)
return -ENOMEM;
+ hcp->source_fd = -1;
hcp->data_fd = -1;
hcp->ct_priv = ct;
hcp->copy.hc_hai = *hai;
if (is_error)
goto ok_out;
- if (hai->hai_action == HSMA_RESTORE) {
- rc = ct_stat_by_fid(hcp->ct_priv, &hai->hai_fid, &hcp->stat);
+ if (hai->hai_action == HSMA_ARCHIVE) {
+ fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid,
+ O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
+ if (fd < 0) {
+ rc = fd;
+ goto err_out;
+ }
+
+ hcp->source_fd = fd;
+ } else if (hai->hai_action == HSMA_RESTORE) {
+ rc = ct_md_getattr(hcp->ct_priv, &hai->hai_fid, &hcp->statx);
if (rc < 0)
goto err_out;
restore_open_flags);
if (rc < 0)
goto err_out;
+ } else if (hai->hai_action == HSMA_REMOVE) {
+ /* Since remove is atomic there is no need to send an
+ * initial MDS_HSM_PROGRESS RPC.
+ * RW-PCC uses Lustre HSM mechanism for data synchronization.
+ * At the beginning of RW-PCC attach, the client tries to
+ * exclusively open the file by using a lease lock. A
+ * successful lease open ensures that the current attach
+ * process is the unique opener for the file.
+ * After taking the lease, the file data is then copied from
+ * OSTs into PCC and then the client closes the lease with
+ * with a PCC attach intent.
+ * However, for a file with HSM exists, archived state (i.e. a
+ * cached file just was detached from PCC and restore into
+ * OST), a HSM REMOVE request may delete the above PCC copy
+ * during RW-PCC attach wrongly.
+ * Thus, a open/close on the corresponding Lustre file is added
+ * for HSMA_REMOVE here to solve this conflict.
+ */
+ fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_fid,
+ O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
+ if (fd < 0) {
+ rc = fd;
+ /* ignore the error in case of Remove Archive on Last
+ * Unlink (RAoLU).
+ */
+ if (rc == -ENOENT) {
+ rc = 0;
+ goto out_log;
+ }
+ goto err_out;
+ }
+
+ hcp->source_fd = fd;
+ goto out_log;
}
rc = ioctl(ct->mnt_fd, LL_IOC_HSM_COPY_START, &hcp->copy);
goto err_out;
}
+out_log:
llapi_hsm_log_ct_progress(&hcp, hai, CT_START, 0, 0);
ok_out:
return 0;
err_out:
+ if (!(hcp->source_fd < 0))
+ close(hcp->source_fd);
+
if (!(hcp->data_fd < 0))
close(hcp->data_fd);
hai = &hcp->copy.hc_hai;
if (hai->hai_action == HSMA_RESTORE && errval == 0) {
- struct timeval tv[2];
-
- /* Set {a,m}time of volatile file to that of original. */
- tv[0].tv_sec = hcp->stat.st_atime;
- tv[0].tv_usec = 0;
- tv[1].tv_sec = hcp->stat.st_mtime;
- tv[1].tv_usec = 0;
- if (futimes(hcp->data_fd, tv) < 0) {
+ struct ll_futimes_3 lfu = {
+ .lfu_atime_sec = hcp->statx.stx_atime.tv_sec,
+ .lfu_atime_nsec = hcp->statx.stx_atime.tv_nsec,
+ .lfu_mtime_sec = hcp->statx.stx_mtime.tv_sec,
+ .lfu_mtime_nsec = hcp->statx.stx_mtime.tv_nsec,
+ .lfu_ctime_sec = hcp->statx.stx_ctime.tv_sec,
+ .lfu_ctime_nsec = hcp->statx.stx_ctime.tv_nsec,
+ };
+
+ rc = fsync(hcp->data_fd);
+ if (rc < 0) {
errval = -errno;
goto end;
}
- rc = fsync(hcp->data_fd);
+ /* Set {a,m,c}time of volatile file to that of original. */
+ rc = ioctl(hcp->data_fd, LL_IOC_FUTIMES_3, &lfu);
if (rc < 0) {
errval = -errno;
goto end;
llapi_hsm_log_ct_progress(&hcp, hai, CT_FINISH, 0, 0);
err_cleanup:
+ if (!(hcp->source_fd < 0))
+ close(hcp->source_fd);
+
if (!(hcp->data_fd < 0))
close(hcp->data_fd);
* @return error code if the action is not a copy operation.
*/
int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
- lustre_fid *fid)
+ struct lu_fid *fid)
{
const struct hsm_action_item *hai = &hcp->copy.hc_hai;
return -EINVAL;
if (hai->hai_action == HSMA_ARCHIVE) {
- return ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid,
- O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
+ fd = dup(hcp->source_fd);
+ return fd < 0 ? -errno : fd;
} else if (hai->hai_action == HSMA_RESTORE) {
fd = dup(hcp->data_fd);
return fd < 0 ? -errno : fd;
int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
unsigned long long stripe_size, int stripe_offset,
int stripe_count, int stripe_pattern, char *pool_name,
- lustre_fid *newfid)
+ struct lu_fid *newfid)
{
struct hsm_user_import hui;
int fd;