Whamcloud - gitweb
LU-14398 hsm: use llapi_fid2path_at() in the copytool
[fs/lustre-release.git] / lustre / utils / liblustreapi_hsm.c
index 738c394..ba99cd3 100644 (file)
@@ -6,6 +6,8 @@
  * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
  *     alternatives
  *
+ * Copyright (c) 2013, 2017, Intel Corporation.
+ *
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the GNU Lesser General Public License
  * (LGPL) version 2.1 or (at your discretion) any later version.
@@ -30,6 +32,7 @@
  * Author: Henri Doreau <henri.doreau@cea.fr>
  */
 
+#include <fcntl.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <dirent.h>
 #include <stdarg.h>
 #include <sys/stat.h>
+#include <sys/time.h>
 #include <sys/types.h>
+#include <time.h>
 #include <utime.h>
 #include <sys/syscall.h>
 #include <fnmatch.h>
-#include <glob.h>
 #include <signal.h>
 #ifdef HAVE_LINUX_UNISTD_H
 #include <linux/unistd.h>
@@ -53,9 +57,7 @@
 #include <unistd.h>
 #endif
 
-#include <liblustre.h>
-#include <lnet/lnetctl.h>
-#include <obd.h>
+#include <linux/lnet/lnetctl.h>
 #include <lustre/lustreapi.h>
 #include "lustreapi_internal.h"
 
 /****** HSM Copytool API ********/
 #define CT_PRIV_MAGIC 0xC0BE2001
 struct hsm_copytool_private {
-       int                      magic;
-       char                    *mnt;
-       int                      mnt_fd;
-       int                      open_by_fid_fd;
-       lustre_kernelcomm        kuc;
-       __u32                    archives;
+       int                              magic;
+       char                            *mnt;
+       struct kuc_hdr                  *kuch;
+       int                              mnt_fd;
+       int                              open_by_fid_fd;
+       struct lustre_kernelcomm        *kuc;
 };
 
 #define CP_PRIV_MAGIC 0x19880429
 struct hsm_copyaction_private {
        __u32                                    magic;
+       __u32                                    source_fd;
        __s32                                    data_fd;
        const struct hsm_copytool_private       *ct_priv;
        struct hsm_copy                          copy;
-       struct stat                              stat;
+       lstatx_t                                 statx;
 };
 
-#include <libcfs/libcfs.h>
-
 enum ct_progress_type {
        CT_START        = 0,
        CT_RUNNING      = 50,
@@ -113,7 +114,8 @@ enum ct_event {
 };
 
 /* initialized in llapi_hsm_register_event_fifo() */
-int llapi_hsm_event_fd = -1;
+static int llapi_hsm_event_fd = -1;
+static bool created_hsm_event_fifo;
 
 static inline const char *llapi_hsm_ct_ev2str(int type)
 {
@@ -169,7 +171,7 @@ static inline const char *llapi_hsm_ct_ev2str(int type)
  * \retval 0 on success.
  * \retval -errno on error.
  */
-int llapi_hsm_write_json_event(struct llapi_json_item_list **event)
+static int llapi_hsm_write_json_event(struct llapi_json_item_list **event)
 {
        int                             rc;
        char                            time_string[40];
@@ -237,14 +239,18 @@ int llapi_hsm_write_json_event(struct llapi_json_item_list **event)
  * \retval 0 on success.
  * \retval -errno on error.
  */
-int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv,
-                                 __u32 event_type)
+static int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv,
+                                        __u32 event_type)
 {
        int                             rc;
        char                            agent_uuid[UUID_MAX];
        struct hsm_copytool_private     *ct;
        struct llapi_json_item_list     *json_items;
 
+       /* Noop unless the event fd was initialized */
+       if (llapi_hsm_event_fd < 0)
+               return 0;
+
        if (priv == NULL || *priv == NULL)
                return -EINVAL;
 
@@ -275,7 +281,7 @@ int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv,
                goto err;
 
        rc = llapi_json_add_item(&json_items, "archive", LLAPI_JSON_INTEGER,
-                                &ct->archives);
+                                &ct->kuc->lk_data_count);
        if (rc < 0)
                goto err;
 
@@ -342,9 +348,10 @@ out_free:
  * \retval 0 on success.
  * \retval -errno on error.
  */
-int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
-                   const struct hsm_action_item *hai, __u32 progress_type,
-                   __u64 total, __u64 current)
+static int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
+                                    const struct hsm_action_item *hai,
+                                    __u32 progress_type,
+                                    __u64 total, __u64 current)
 {
        int                             rc;
        int                             linkno = 0;
@@ -354,6 +361,10 @@ int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
        struct hsm_copyaction_private   *hcp;
        struct llapi_json_item_list     *json_items;
 
+       /* Noop unless the event fd was initialized */
+       if (llapi_hsm_event_fd < 0)
+               return 0;
+
        if (phcp == NULL || *phcp == NULL)
                return -EINVAL;
 
@@ -401,8 +412,9 @@ int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
        /* lustre_path isn't available after a restore completes */
        /* total_bytes isn't available after a restore or archive completes */
        if (progress_type != CT_FINISH) {
-               rc = llapi_fid2path(hcp->ct_priv->mnt, strfid, lustre_path,
-                                   sizeof(lustre_path), &recno, &linkno);
+               rc = llapi_fid2path_at(hcp->ct_priv->mnt_fd, &hai->hai_dfid,
+                                      lustre_path, sizeof(lustre_path),
+                                      &recno, &linkno);
                if (rc < 0)
                        goto err;
 
@@ -417,11 +429,12 @@ int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
                        goto err;
        }
 
-       if (progress_type == CT_RUNNING)
+       if (progress_type == CT_RUNNING) {
                rc = llapi_json_add_item(&json_items, "current_bytes",
                                         LLAPI_JSON_BIGNUM, &current);
                if (rc < 0)
                        goto err;
+       }
 
 cancel:
        rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
@@ -458,10 +471,12 @@ out_free:
  * \retval 0 on success.
  * \retval -errno on error.
  */
-int llapi_hsm_register_event_fifo(char *path)
+int llapi_hsm_register_event_fifo(const char *path)
 {
        int read_fd;
        struct stat statbuf;
+       struct sigaction ignore_action;
+       int rc;
 
        /* Create the FIFO if necessary. */
        if ((mkfifo(path, 0644) < 0) && (errno != EEXIST)) {
@@ -480,6 +495,8 @@ int llapi_hsm_register_event_fifo(char *path)
                                    "not a pipe or has a wrong mode", path);
                        return -errno;
                }
+       } else {
+               created_hsm_event_fifo = true;
        }
 
        /* Open the FIFO for read so that the subsequent open for write
@@ -494,19 +511,24 @@ int llapi_hsm_register_event_fifo(char *path)
        /* Open the FIFO for writes, but don't block on waiting
         * for a reader. */
        llapi_hsm_event_fd = open(path, O_WRONLY | O_NONBLOCK);
-       if (llapi_hsm_event_fd < 0) {
-               llapi_error(LLAPI_MSG_ERROR, errno,
-                           "cannot open(%s) for write", path);
-               return -errno;
-       }
+       rc = -errno;
 
        /* Now close the reader. An external monitoring process can
         * now open the FIFO for reads. If no reader comes along the
         * events are lost. NOTE: Only one reader at a time! */
        close(read_fd);
 
+       if (llapi_hsm_event_fd < 0) {
+               llapi_error(LLAPI_MSG_ERROR, -rc,
+                           "cannot open(%s) for write", path);
+               return rc;
+       }
+
        /* Ignore SIGPIPEs -- can occur if the reader goes away. */
-       signal(SIGPIPE, SIG_IGN);
+       memset(&ignore_action, 0, sizeof(ignore_action));
+       ignore_action.sa_handler = SIG_IGN;
+       sigemptyset(&ignore_action.sa_mask);
+       sigaction(SIGPIPE, &ignore_action, NULL);
 
        return 0;
 }
@@ -519,7 +541,7 @@ int llapi_hsm_register_event_fifo(char *path)
  * \retval 0 on success.
  * \retval -errno on error.
  */
-int llapi_hsm_unregister_event_fifo(char *path)
+int llapi_hsm_unregister_event_fifo(const char *path)
 {
        /* Noop unless the event fd was initialized */
        if (llapi_hsm_event_fd < 0)
@@ -528,7 +550,10 @@ int llapi_hsm_unregister_event_fifo(char *path)
        if (close(llapi_hsm_event_fd) < 0)
                return -errno;
 
-       unlink(path);
+       if (created_hsm_event_fifo) {
+               unlink(path);
+               created_hsm_event_fifo = false;
+       }
 
        llapi_hsm_event_fd = -1;
 
@@ -633,20 +658,23 @@ err:
 out_free:
        if (json_items != NULL)
                llapi_json_destroy_list(&json_items);
-
-       return;
 }
 
 /** Register a copytool
- * \param[out] priv Opaque private control structure
- * \param mnt Lustre filesystem mount point
- * \param flags Open flags, currently unused (e.g. O_NONBLOCK)
- * \param archive_count
- * \param archives Which archive numbers this copytool is responsible for
+ * \param[out] priv            Opaque private control structure
+ * \param mnt                  Lustre filesystem mount point
+ * \param archive_count                Number of valid archive IDs in \a archives
+ * \param archives             Which archive numbers this copytool is
+ *                             responsible for
+ * \param rfd_flags            flags applied to read fd of pipe
+ *                             (e.g. O_NONBLOCK)
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
  */
 int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
-                               const char *mnt, int flags, int archive_count,
-                               int *archives)
+                               const char *mnt, int archive_count,
+                               int *archives, int rfd_flags)
 {
        struct hsm_copytool_private     *ct;
        int                              rc;
@@ -657,6 +685,16 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
                return -EINVAL;
        }
 
+       for (rc = 0; rc < archive_count; rc++) {
+               /* in the list we have an all archive wildcard
+                * so move to all archives mode
+                */
+               if (archives[rc] == 0) {
+                       archive_count = 0;
+                       break;
+               }
+       }
+
        ct = calloc(1, sizeof(*ct));
        if (ct == NULL)
                return -ENOMEM;
@@ -664,8 +702,6 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
        ct->magic = CT_PRIV_MAGIC;
        ct->mnt_fd = -1;
        ct->open_by_fid_fd = -1;
-       ct->kuc.lk_rfd = LK_NOFD;
-       ct->kuc.lk_wfd = LK_NOFD;
 
        ct->mnt = strdup(mnt);
        if (ct->mnt == NULL) {
@@ -673,6 +709,12 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
                goto out_err;
        }
 
+       ct->kuch = calloc(1, HAL_MAXSIZE + sizeof(*ct->kuch));
+       if (ct->kuch == NULL) {
+               rc = -ENOMEM;
+               goto out_err;
+       }
+
        ct->mnt_fd = open(ct->mnt, O_RDONLY);
        if (ct->mnt_fd < 0) {
                rc = -errno;
@@ -685,56 +727,56 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
                goto out_err;
        }
 
-       /* no archives specified means "match all". */
-       ct->archives = 0;
-       for (rc = 0; rc < archive_count; rc++) {
-               if (archives[rc] > 8 * sizeof(ct->archives)) {
-                       llapi_err_noerrno(LLAPI_MSG_ERROR,
-                                         "maximum of %zu archives supported",
-                                         8 * sizeof(ct->archives));
-                       goto out_err;
-               }
-               /* in the list we have a all archive wildcard
-                * so move to all archives mode
-                */
-               if (archives[rc] == 0) {
-                       ct->archives = 0;
-                       archive_count = 0;
-                       break;
-               }
-               ct->archives |= (1 << (archives[rc] - 1));
+       ct->kuc = malloc(sizeof(*ct) + archive_count * sizeof(__u32));
+       if (ct->kuc == NULL) {
+               rc = -ENOMEM;
+               goto out_err;
        }
 
-       rc = libcfs_ukuc_start(&ct->kuc, KUC_GRP_HSM);
+       ct->kuc->lk_rfd = LK_NOFD;
+       ct->kuc->lk_wfd = LK_NOFD;
+
+       rc = libcfs_ukuc_start(ct->kuc, KUC_GRP_HSM, rfd_flags);
        if (rc < 0)
-               goto out_err;
+               goto out_free_kuc;
 
-       /* Storing archive(s) in lk_data; see mdc_ioc_hsm_ct_start */
-       ct->kuc.lk_data = ct->archives;
-       rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc);
+       ct->kuc->lk_flags = LK_FLG_DATANR;
+       ct->kuc->lk_data_count = archive_count;
+       for (rc = 0; rc < archive_count; rc++) {
+               if (archives[rc] < 0) {
+                       llapi_err_noerrno(LLAPI_MSG_ERROR, "%d requested when "
+                                         "archive id >= 0 is supported",
+                                         archives[rc]);
+                       rc = -EINVAL;
+                       goto out_kuc;
+               }
+
+               ct->kuc->lk_data[rc] = archives[rc];
+       }
+
+       rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc);
        if (rc < 0) {
                rc = -errno;
                llapi_error(LLAPI_MSG_ERROR, rc,
                            "cannot start copytool on '%s'", mnt);
-               goto out_err;
-       } else {
-               rc = 0;
+               goto out_kuc;
        }
 
        llapi_hsm_log_ct_registration(&ct, CT_REGISTER);
 
        /* Only the kernel reference keeps the write side open */
-       close(ct->kuc.lk_wfd);
-       ct->kuc.lk_wfd = LK_NOFD;
-       if (rc < 0)
-               goto out_kuc;
-
+       close(ct->kuc->lk_wfd);
+       ct->kuc->lk_wfd = LK_NOFD;
        *priv = ct;
+
        return 0;
 
 out_kuc:
        /* cleanup the kuc channel */
-       libcfs_ukuc_stop(&ct->kuc);
+       libcfs_ukuc_stop(ct->kuc);
+
+out_free_kuc:
+       free(ct->kuc);
 
 out_err:
        if (!(ct->mnt_fd < 0))
@@ -743,8 +785,9 @@ out_err:
        if (!(ct->open_by_fid_fd < 0))
                close(ct->open_by_fid_fd);
 
-       if (ct->mnt != NULL)
-               free(ct->mnt);
+       free(ct->mnt);
+
+       free(ct->kuch);
 
        free(ct);
 
@@ -767,30 +810,51 @@ int llapi_hsm_copytool_unregister(struct hsm_copytool_private **priv)
        if (ct->magic != CT_PRIV_MAGIC)
                return -EINVAL;
 
-       /* Tell the kernel to stop sending us messages */
-       ct->kuc.lk_flags = LK_FLG_STOP;
-       ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc);
+       /* Close the read side of the KUC pipe. This should be done
+        * before unregistering to avoid deadlock: a ldlm_cb thread
+        * enters libcfs_kkuc_group_put() acquires kg_sem and blocks
+        * in pipe_write() due to full pipe; then we attempt to
+        * unregister and block on kg_sem. */
+       libcfs_ukuc_stop(ct->kuc);
 
-       /* Shut down the kernelcomms */
-       libcfs_ukuc_stop(&ct->kuc);
+       /* Tell the kernel to stop sending us messages */
+       ct->kuc->lk_flags = LK_FLG_STOP;
+       ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc);
 
        llapi_hsm_log_ct_registration(&ct, CT_UNREGISTER);
 
        close(ct->open_by_fid_fd);
        close(ct->mnt_fd);
        free(ct->mnt);
+       free(ct->kuch);
+       free(ct->kuc);
        free(ct);
        *priv = NULL;
 
        return 0;
 }
 
+/** Returns a file descriptor to poll/select on.
+ * \param ct Opaque private control structure
+ * \retval -EINVAL on error
+ * \retval the file descriptor for reading HSM events from the kernel
+ */
+int llapi_hsm_copytool_get_fd(struct hsm_copytool_private *ct)
+{
+       if (ct == NULL || ct->magic != CT_PRIV_MAGIC)
+               return -EINVAL;
+
+       return libcfs_ukuc_get_rfd(ct->kuc);
+}
+
 /** Wait for the next hsm_action_list
  * \param ct Opaque private control structure
  * \param halh Action list handle, will be allocated here
  * \param msgsize Number of bytes in the message, will be set here
  * \return 0 valid message received; halh and msgsize are set
  *        <0 error code
+ * Note: The application must not call llapi_hsm_copytool_recv until it has
+ * cleared the data in ct->kuch from the previous call.
  */
 int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
                            struct hsm_action_list **halh, int *msgsize)
@@ -805,21 +869,20 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
        if (halh == NULL || msgsize == NULL)
                return -EINVAL;
 
-       kuch = malloc(HAL_MAXSIZE + sizeof(*kuch));
-       if (kuch == NULL)
-               return -ENOMEM;
+       kuch = ct->kuch;
 
-       rc = libcfs_ukuc_msg_get(&ct->kuc, (char *)kuch,
+repeat:
+       rc = libcfs_ukuc_msg_get(ct->kuc, (char *)kuch,
                                 HAL_MAXSIZE + sizeof(*kuch),
                                 KUC_TRANSPORT_HSM);
        if (rc < 0)
-               goto out_free;
+               goto out_err;
 
        /* Handle generic messages */
        if (kuch->kuc_transport == KUC_TRANSPORT_GENERIC &&
            kuch->kuc_msgtype == KUC_MSG_SHUTDOWN) {
                rc = -ESHUTDOWN;
-               goto out_free;
+               goto out_err;
        }
 
        if (kuch->kuc_transport != KUC_TRANSPORT_HSM ||
@@ -828,14 +891,14 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
                                  "Unknown HSM message type %d:%d\n",
                                  kuch->kuc_transport, kuch->kuc_msgtype);
                rc = -EPROTO;
-               goto out_free;
+               goto out_err;
        }
 
        if (kuch->kuc_msglen < sizeof(*kuch) + sizeof(*hal)) {
                llapi_err_noerrno(LLAPI_MSG_ERROR, "Short HSM message %d",
                                  kuch->kuc_msglen);
                rc = -EPROTO;
-               goto out_free;
+               goto out_err;
        }
 
        /* Our message is a hsm_action_list. Use pointer math to skip
@@ -845,36 +908,28 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
 
        /* Check that we have registered for this archive #
         * if 0 registered, we serve any archive */
-       if (ct->archives &&
-           ((1 << (hal->hal_archive_id - 1)) & ct->archives) == 0) {
-               llapi_err_noerrno(LLAPI_MSG_INFO,
-                                 "This copytool does not service archive #%d,"
-                                 " ignoring this request."
-                                 " Mask of served archive is 0x%.8X",
-                                 hal->hal_archive_id, ct->archives);
-               rc = -EAGAIN;
-
-               goto out_free;
+       if (ct->kuc != NULL && ct->kuc->lk_data_count != 0) {
+               int i;
+
+               for (i = 0; i < ct->kuc->lk_data_count; i++) {
+                       if (hal->hal_archive_id == ct->kuc->lk_data[i])
+                               break;
+               }
+
+               if (i >= ct->kuc->lk_data_count)
+                       goto repeat;
        }
 
        *halh = hal;
        *msgsize = kuch->kuc_msglen - sizeof(*kuch);
        return 0;
 
-out_free:
+out_err:
        *halh = NULL;
        *msgsize = 0;
-       free(kuch);
        return rc;
 }
 
-/** Release the action list when done with it. */
-void llapi_hsm_action_list_free(struct hsm_action_list **hal)
-{
-       /* Reuse the llapi_changelog_free function */
-       llapi_changelog_free((struct changelog_ext_rec **)hal);
-}
-
 /** Get parent path from mount point and fid.
  *
  * \param mnt        Filesystem root path.
@@ -883,25 +938,22 @@ void llapi_hsm_action_list_free(struct hsm_action_list **hal)
  * \param parent_len Destination buffer size.
  * \return 0 on success.
  */
-static int fid_parent(const char *mnt, const lustre_fid *fid, char *parent,
-                     size_t parent_len)
+static int fid_parent(const struct hsm_copytool_private *ct,
+                     const struct lu_fid *fid, char *parent, size_t parent_len)
 {
        int              rc;
        int              linkno = 0;
        long long        recno = -1;
        char             file[PATH_MAX];
-       char             strfid[FID_NOBRACE_LEN + 1];
        char            *ptr;
 
-       snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(fid));
-
-       rc = llapi_fid2path(mnt, strfid, file, sizeof(file),
-                           &recno, &linkno);
+       rc = llapi_fid2path_at(ct->mnt_fd, fid, file, sizeof(file),
+                              &recno, &linkno);
        if (rc < 0)
                return rc;
 
        /* fid2path returns a relative path */
-       rc = snprintf(parent, parent_len, "%s/%s", mnt, file);
+       rc = snprintf(parent, parent_len, "%s/%s", ct->mnt, file);
        if (rc >= parent_len)
                return -ENAMETOOLONG;
 
@@ -921,21 +973,58 @@ static int ct_open_by_fid(const struct hsm_copytool_private *ct,
                          const struct lu_fid *fid, int open_flags)
 {
        char fid_name[FID_NOBRACE_LEN + 1];
+       int fd;
 
        snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid));
 
-       return openat(ct->open_by_fid_fd, fid_name, open_flags);
+       fd = openat(ct->open_by_fid_fd, fid_name, open_flags);
+       return fd < 0 ? -errno : fd;
 }
 
-static int ct_stat_by_fid(const struct hsm_copytool_private *ct,
-                         const struct lu_fid *fid,
-                         struct stat *buf)
+/**
+ * Get metadata attributes of file by FID.
+ *
+ * Use the IOC_MDC_GETFILEINFO ioctl (to send a MDS_GETATTR_NAME RPC)
+ * to get the attributes of the file identified by \a fid. This
+ * returns only the attributes stored on the MDT and avoids taking
+ * layout locks or accessing OST objects. It also bypasses the inode
+ * cache. Attributes are returned in \a st.
+ */
+static int ct_md_getattr(const struct hsm_copytool_private *ct,
+                        const struct lu_fid *fid,
+                        lstatx_t *stx)
 {
-       char fid_name[FID_NOBRACE_LEN + 1];
+       struct lov_user_mds_data *lmd;
+       char fname[FID_NOBRACE_LEN + 1] = "";
+       size_t lmd_size;
+       int rc;
 
-       snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid));
+       rc = snprintf(fname, sizeof(fname), DFID_NOBRACE, PFID(fid));
+       if (rc < 0)
+               return rc;
+       if (rc >= sizeof(fname) || rc == 0)
+               return -EINVAL;
+
+       lmd_size = offsetof(typeof(*lmd), lmd_lmm) +
+               lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3);
+
+       if (lmd_size < offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX)
+               lmd_size = offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX;
+
+       lmd = malloc(lmd_size);
+       if (lmd == NULL)
+               return -ENOMEM;
 
-       return fstatat(ct->open_by_fid_fd, fid_name, buf, 0);
+       rc = get_lmd_info_fd(fname, ct->open_by_fid_fd, -1,
+                            lmd, lmd_size, GET_LMD_INFO);
+       if (rc)
+               goto out;
+
+       *stx = lmd->lmd_stx;
+out:
+       free(lmd);
+
+       return rc;
 }
 
 /** Create the destination volatile file for a restore operation.
@@ -948,26 +1037,26 @@ static int ct_stat_by_fid(const struct hsm_copytool_private *ct,
 static int create_restore_volatile(struct hsm_copyaction_private *hcp,
                                   int mdt_index, int open_flags)
 {
-       int                      rc;
-       int                      fd;
-       char                     parent[PATH_MAX + 1];
-       const char              *mnt = hcp->ct_priv->mnt;
-       struct hsm_action_item  *hai = &hcp->copy.hc_hai;
+       const struct hsm_copytool_private *ct = hcp->ct_priv;
+       struct hsm_action_item *hai = &hcp->copy.hc_hai;
+       char parent[PATH_MAX + 1];
+       int fd;
+       int rc;
 
-       rc = fid_parent(mnt, &hai->hai_fid, parent, sizeof(parent));
+       rc = fid_parent(ct, &hai->hai_fid, parent, sizeof(parent));
        if (rc < 0) {
                /* fid_parent() failed, try to keep on going */
                llapi_error(LLAPI_MSG_ERROR, rc,
                            "cannot get parent path to restore "DFID" "
-                           "using '%s'", PFID(&hai->hai_fid), mnt);
-               snprintf(parent, sizeof(parent), "%s", mnt);
+                           "using '%s'", PFID(&hai->hai_fid), ct->mnt);
+               snprintf(parent, sizeof(parent), "%s", ct->mnt);
        }
 
        fd = llapi_create_volatile_idx(parent, mdt_index, open_flags);
        if (fd < 0)
                return fd;
 
-       rc = fchown(fd, hcp->stat.st_uid, hcp->stat.st_gid);
+       rc = fchown(fd, hcp->statx.stx_uid, hcp->statx.stx_gid);
        if (rc < 0)
                goto err_cleanup;
 
@@ -1010,13 +1099,15 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
                           int restore_mdt_index, int restore_open_flags,
                           bool is_error)
 {
-       struct hsm_copyaction_private   *hcp;
-       int                              rc;
+       struct hsm_copyaction_private *hcp;
+       int fd;
+       int rc;
 
        hcp = calloc(1, sizeof(*hcp));
        if (hcp == NULL)
                return -ENOMEM;
 
+       hcp->source_fd = -1;
        hcp->data_fd = -1;
        hcp->ct_priv = ct;
        hcp->copy.hc_hai = *hai;
@@ -1025,8 +1116,17 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
        if (is_error)
                goto ok_out;
 
-       if (hai->hai_action == HSMA_RESTORE) {
-               rc = ct_stat_by_fid(hcp->ct_priv, &hai->hai_fid, &hcp->stat);
+       if (hai->hai_action == HSMA_ARCHIVE) {
+               fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid,
+                               O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
+               if (fd < 0) {
+                       rc = fd;
+                       goto err_out;
+               }
+
+               hcp->source_fd = fd;
+       } else if (hai->hai_action == HSMA_RESTORE) {
+               rc = ct_md_getattr(hcp->ct_priv, &hai->hai_fid, &hcp->statx);
                if (rc < 0)
                        goto err_out;
 
@@ -1034,6 +1134,40 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
                                             restore_open_flags);
                if (rc < 0)
                        goto err_out;
+       } else if (hai->hai_action == HSMA_REMOVE) {
+               /* Since remove is atomic there is no need to send an
+                * initial MDS_HSM_PROGRESS RPC.
+                * RW-PCC uses Lustre HSM mechanism for data synchronization.
+                * At the beginning of RW-PCC attach, the client tries to
+                * exclusively open the file by using a lease lock. A
+                * successful lease open ensures that the current attach
+                * process is the unique opener for the file.
+                * After taking the lease, the file data is then copied from
+                * OSTs into PCC and then the client closes the lease with
+                * with a PCC attach intent.
+                * However, for a file with HSM exists, archived state (i.e. a
+                * cached file just was detached from PCC and restore into
+                * OST), a HSM REMOVE request may delete the above PCC copy
+                * during RW-PCC attach wrongly.
+                * Thus, a open/close on the corresponding Lustre file is added
+                * for HSMA_REMOVE here to solve this conflict.
+                */
+               fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_fid,
+                               O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
+               if (fd < 0) {
+                       rc = fd;
+                       /* ignore the error in case of Remove Archive on Last
+                        * Unlink (RAoLU).
+                        */
+                       if (rc == -ENOENT) {
+                               rc = 0;
+                               goto out_log;
+                       }
+                       goto err_out;
+               }
+
+               hcp->source_fd = fd;
+               goto out_log;
        }
 
        rc = ioctl(ct->mnt_fd, LL_IOC_HSM_COPY_START, &hcp->copy);
@@ -1042,6 +1176,7 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
                goto err_out;
        }
 
+out_log:
        llapi_hsm_log_ct_progress(&hcp, hai, CT_START, 0, 0);
 
 ok_out:
@@ -1050,6 +1185,9 @@ ok_out:
        return 0;
 
 err_out:
+       if (!(hcp->source_fd < 0))
+               close(hcp->source_fd);
+
        if (!(hcp->data_fd < 0))
                close(hcp->data_fd);
 
@@ -1086,19 +1224,23 @@ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp,
        hai = &hcp->copy.hc_hai;
 
        if (hai->hai_action == HSMA_RESTORE && errval == 0) {
-               struct timeval tv[2];
-
-               /* Set {a,m}time of volatile file to that of original. */
-               tv[0].tv_sec = hcp->stat.st_atime;
-               tv[0].tv_usec = 0;
-               tv[1].tv_sec = hcp->stat.st_mtime;
-               tv[1].tv_usec = 0;
-               if (futimes(hcp->data_fd, tv) < 0) {
+               struct ll_futimes_3 lfu = {
+                       .lfu_atime_sec = hcp->statx.stx_atime.tv_sec,
+                       .lfu_atime_nsec = hcp->statx.stx_atime.tv_nsec,
+                       .lfu_mtime_sec = hcp->statx.stx_mtime.tv_sec,
+                       .lfu_mtime_nsec = hcp->statx.stx_mtime.tv_nsec,
+                       .lfu_ctime_sec = hcp->statx.stx_ctime.tv_sec,
+                       .lfu_ctime_nsec = hcp->statx.stx_ctime.tv_nsec,
+               };
+
+               rc = fsync(hcp->data_fd);
+               if (rc < 0) {
                        errval = -errno;
                        goto end;
                }
 
-               rc = fsync(hcp->data_fd);
+               /* Set {a,m,c}time of volatile file to that of original. */
+               rc = ioctl(hcp->data_fd, LL_IOC_FUTIMES_3, &lfu);
                if (rc < 0) {
                        errval = -errno;
                        goto end;
@@ -1127,6 +1269,9 @@ end:
        llapi_hsm_log_ct_progress(&hcp, hai, CT_FINISH, 0, 0);
 
 err_cleanup:
+       if (!(hcp->source_fd < 0))
+               close(hcp->source_fd);
+
        if (!(hcp->data_fd < 0))
                close(hcp->data_fd);
 
@@ -1181,7 +1326,7 @@ int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
  * @return error code if the action is not a copy operation.
  */
 int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
-                             lustre_fid *fid)
+                             struct lu_fid *fid)
 {
        const struct hsm_action_item    *hai = &hcp->copy.hc_hai;
 
@@ -1206,17 +1351,20 @@ int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
 int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp)
 {
        const struct hsm_action_item    *hai = &hcp->copy.hc_hai;
+       int fd;
 
        if (hcp->magic != CP_PRIV_MAGIC)
                return -EINVAL;
 
-       if (hai->hai_action == HSMA_ARCHIVE)
-               return ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid,
-                               O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
-       else if (hai->hai_action == HSMA_RESTORE)
-               return dup(hcp->data_fd);
-       else
+       if (hai->hai_action == HSMA_ARCHIVE) {
+               fd = dup(hcp->source_fd);
+               return fd < 0 ? -errno : fd;
+       } else if (hai->hai_action == HSMA_RESTORE) {
+               fd = dup(hcp->data_fd);
+               return fd < 0 ? -errno : fd;
+       } else {
                return -EINVAL;
+       }
 }
 
 /**
@@ -1235,7 +1383,7 @@ int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp)
 int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
                     unsigned long long stripe_size, int stripe_offset,
                     int stripe_count, int stripe_pattern, char *pool_name,
-                    lustre_fid *newfid)
+                    struct lu_fid *newfid)
 {
        struct hsm_user_import   hui;
        int                      fd;
@@ -1250,9 +1398,9 @@ int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
                                  stripe_pattern | LOV_PATTERN_F_RELEASED,
                                  pool_name);
        if (fd < 0) {
-               llapi_error(LLAPI_MSG_ERROR, -errno,
+               llapi_error(LLAPI_MSG_ERROR, fd,
                            "cannot create '%s' for import", dst);
-               return -errno;
+               return fd;
        }
 
        /* Get the new fid in Lustre. Caller needs to use this fid
@@ -1275,8 +1423,8 @@ int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
        hui.hui_mtime_ns = st->st_mtim.tv_nsec;
        rc = ioctl(fd, LL_IOC_HSM_IMPORT, &hui);
        if (rc != 0) {
-               llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst);
                rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst);
                goto out_unlink;
        }