Whamcloud - gitweb
LU-5095 hsm: Allow receiving messages to be non-blocking
[fs/lustre-release.git] / lustre / utils / liblustreapi_hsm.c
index 9a36c6b..e47a7d9 100644 (file)
 #include <sys/syscall.h>
 #include <fnmatch.h>
 #include <glob.h>
+#include <signal.h>
 #ifdef HAVE_LINUX_UNISTD_H
 #include <linux/unistd.h>
 #else
 #include <unistd.h>
 #endif
 
-#include <liblustre.h>
+#include <libcfs/libcfs.h>
 #include <lnet/lnetctl.h>
-#include <obd.h>
-#include <obd_lov.h>
+#include <lustre/lustre_idl.h>
 #include <lustre/lustreapi.h>
 #include "lustreapi_internal.h"
 
+#define OPEN_BY_FID_PATH dot_lustre_name"/fid"
+
 /****** HSM Copytool API ********/
 #define CT_PRIV_MAGIC 0xC0BE2001
 struct hsm_copytool_private {
        int                      magic;
        char                    *mnt;
+       struct kuc_hdr          *kuch;
        int                      mnt_fd;
+       int                      open_by_fid_fd;
        lustre_kernelcomm        kuc;
        __u32                    archives;
 };
@@ -75,20 +79,575 @@ struct hsm_copyaction_private {
        __s32                                    data_fd;
        const struct hsm_copytool_private       *ct_priv;
        struct hsm_copy                          copy;
+       struct stat                              stat;
 };
 
 #include <libcfs/libcfs.h>
 
+enum ct_progress_type {
+       CT_START        = 0,
+       CT_RUNNING      = 50,
+       CT_FINISH       = 100,
+       CT_CANCEL       = 150,
+       CT_ERROR        = 175
+};
+
+enum ct_event {
+       CT_REGISTER             = 1,
+       CT_UNREGISTER           = 2,
+       CT_ARCHIVE_START        = HSMA_ARCHIVE,
+       CT_ARCHIVE_RUNNING      = HSMA_ARCHIVE + CT_RUNNING,
+       CT_ARCHIVE_FINISH       = HSMA_ARCHIVE + CT_FINISH,
+       CT_ARCHIVE_CANCEL       = HSMA_ARCHIVE + CT_CANCEL,
+       CT_ARCHIVE_ERROR        = HSMA_ARCHIVE + CT_ERROR,
+       CT_RESTORE_START        = HSMA_RESTORE,
+       CT_RESTORE_RUNNING      = HSMA_RESTORE + CT_RUNNING,
+       CT_RESTORE_FINISH       = HSMA_RESTORE + CT_FINISH,
+       CT_RESTORE_CANCEL       = HSMA_RESTORE + CT_CANCEL,
+       CT_RESTORE_ERROR        = HSMA_RESTORE + CT_ERROR,
+       CT_REMOVE_START         = HSMA_REMOVE,
+       CT_REMOVE_RUNNING       = HSMA_REMOVE + CT_RUNNING,
+       CT_REMOVE_FINISH        = HSMA_REMOVE + CT_FINISH,
+       CT_REMOVE_CANCEL        = HSMA_REMOVE + CT_CANCEL,
+       CT_REMOVE_ERROR         = HSMA_REMOVE + CT_ERROR,
+       CT_EVENT_MAX
+};
+
+/* initialized in llapi_hsm_register_event_fifo() */
+int llapi_hsm_event_fd = -1;
+
+static inline const char *llapi_hsm_ct_ev2str(int type)
+{
+       switch (type) {
+       case CT_REGISTER:
+               return "REGISTER";
+       case CT_UNREGISTER:
+               return "UNREGISTER";
+       case CT_ARCHIVE_START:
+               return "ARCHIVE_START";
+       case CT_ARCHIVE_RUNNING:
+               return "ARCHIVE_RUNNING";
+       case CT_ARCHIVE_FINISH:
+               return "ARCHIVE_FINISH";
+       case CT_ARCHIVE_CANCEL:
+               return "ARCHIVE_CANCEL";
+       case CT_ARCHIVE_ERROR:
+               return "ARCHIVE_ERROR";
+       case CT_RESTORE_START:
+               return "RESTORE_START";
+       case CT_RESTORE_RUNNING:
+               return "RESTORE_RUNNING";
+       case CT_RESTORE_FINISH:
+               return "RESTORE_FINISH";
+       case CT_RESTORE_CANCEL:
+               return "RESTORE_CANCEL";
+       case CT_RESTORE_ERROR:
+               return "RESTORE_ERROR";
+       case CT_REMOVE_START:
+               return "REMOVE_START";
+       case CT_REMOVE_RUNNING:
+               return "REMOVE_RUNNING";
+       case CT_REMOVE_FINISH:
+               return "REMOVE_FINISH";
+       case CT_REMOVE_CANCEL:
+               return "REMOVE_CANCEL";
+       case CT_REMOVE_ERROR:
+               return "REMOVE_ERROR";
+       default:
+               llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                 "Unknown event type: %d", type);
+               return NULL;
+       }
+}
+
+/**
+ * Writes a JSON event to the monitor FIFO. Noop if no FIFO has been
+ * registered.
+ *
+ * \param event              A list of llapi_json_items comprising a
+ *                           single JSON-formatted event.
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_write_json_event(struct llapi_json_item_list **event)
+{
+       int                             rc;
+       char                            time_string[40];
+       char                            json_buf[PIPE_BUF];
+       FILE                            *buf_file;
+       time_t                          event_time = time(0);
+       struct tm                       time_components;
+       struct llapi_json_item_list     *json_items;
+
+       /* Noop unless the event fd was initialized */
+       if (llapi_hsm_event_fd < 0)
+               return 0;
+
+       if (event == NULL || *event == NULL)
+               return -EINVAL;
+
+       json_items = *event;
+
+       localtime_r(&event_time, &time_components);
+
+       if (strftime(time_string, sizeof(time_string), "%Y-%m-%d %T %z",
+                    &time_components) == 0) {
+               rc = -EINVAL;
+               llapi_error(LLAPI_MSG_ERROR, rc, "strftime() failed");
+               return rc;
+       }
+
+       rc = llapi_json_add_item(&json_items, "event_time", LLAPI_JSON_STRING,
+                                time_string);
+       if (rc < 0) {
+               llapi_error(LLAPI_MSG_ERROR, -rc, "error in "
+                           "llapi_json_add_item()");
+               return rc;
+       }
+
+       buf_file = fmemopen(json_buf, sizeof(json_buf), "w");
+       if (buf_file == NULL)
+               return -errno;
+
+       rc = llapi_json_write_list(event, buf_file);
+       if (rc < 0) {
+               fclose(buf_file);
+               return rc;
+       }
+
+       fclose(buf_file);
+
+       if (write(llapi_hsm_event_fd, json_buf, strlen(json_buf)) < 0) {
+               /* Ignore write failures due to missing reader. */
+               if (errno != EPIPE)
+                       return -errno;
+       }
+
+       return 0;
+}
+
+/**
+ * Hook for llapi_hsm_copytool_register and llapi_hsm_copytool_unregister
+ * to generate JSON events suitable for consumption by a copytool
+ * monitoring process.
+ *
+ * \param priv               Opaque private control structure.
+ * \param event_type         The type of event (register or unregister).
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv,
+                                 __u32 event_type)
+{
+       int                             rc;
+       char                            agent_uuid[UUID_MAX];
+       struct hsm_copytool_private     *ct;
+       struct llapi_json_item_list     *json_items;
+
+       if (priv == NULL || *priv == NULL)
+               return -EINVAL;
+
+       ct = *priv;
+       if (ct->magic != CT_PRIV_MAGIC)
+               return -EINVAL;
+
+       if (event_type != CT_REGISTER && event_type != CT_UNREGISTER)
+               return -EINVAL;
+
+       rc = llapi_json_init_list(&json_items);
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_get_agent_uuid(ct->mnt, agent_uuid, sizeof(agent_uuid));
+       if (rc < 0)
+               goto err;
+       llapi_chomp_string(agent_uuid);
+
+       rc = llapi_json_add_item(&json_items, "uuid", LLAPI_JSON_STRING,
+                                agent_uuid);
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_json_add_item(&json_items, "mount_point", LLAPI_JSON_STRING,
+                                ct->mnt);
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_json_add_item(&json_items, "archive", LLAPI_JSON_INTEGER,
+                                &ct->archives);
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
+                                (char *)llapi_hsm_ct_ev2str(event_type));
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_hsm_write_json_event(&json_items);
+       if (rc < 0)
+               goto err;
+
+       goto out_free;
+
+err:
+       llapi_error(LLAPI_MSG_ERROR, rc, "error in "
+                   "llapi_hsm_log_ct_registration()");
+
+out_free:
+       if (json_items != NULL)
+               llapi_json_destroy_list(&json_items);
+
+       return rc;
+}
+
+/**
+ * Given a copytool progress update, construct a JSON event suitable for
+ * consumption by a copytool monitoring process.
+ *
+ * Examples of various events generated here and written by
+ * llapi_hsm_write_json_event:
+ *
+ * Copytool registration and deregistration:
+ * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "REGISTER",
+ *  "archive": 0, "mount_point": "/mnt/lustre",
+ *  "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"}
+ * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "UNREGISTER",
+ *  "archive": 0, "mount_point": "/mnt/lustre",
+ *  "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"}
+ *
+ * An archive action, start to completion:
+ * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "ARCHIVE_START",
+ *  "total_bytes": 0, "lustre_path": "d71.sanity-hsm/f71.sanity-hsm",
+ *  "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
+ * {"event_time": "2014-02-26 14:50:18 -0500", "event_type": "ARCHIVE_RUNNING",
+ *  "current_bytes": 5242880, "total_bytes": 39000000,
+ *  "lustre_path": "d71.sanity-hsm/f71.sanity-hsm",
+ *  "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
+ * {"event_time": "2014-02-26 14:50:50 -0500", "event_type": "ARCHIVE_FINISH",
+ *  "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
+ *
+ * A log message:
+ * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "LOGGED_MESSAGE",
+ *  "level": "INFO",
+ *  "message": "lhsmtool_posix[42]: copytool fs=lustre archive#=2 item_count=1"}
+ *
+ * \param hcp                Opaque action handle returned by
+ *                           llapi_hsm_action_start.
+ * \param hai                The hsm_action_item describing the request.
+ * \param progress_type      The ct_progress_type describing the update.
+ * \param total              The total expected bytes for the request.
+ * \param current            The current copied byte count for the request.
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
+                   const struct hsm_action_item *hai, __u32 progress_type,
+                   __u64 total, __u64 current)
+{
+       int                             rc;
+       int                             linkno = 0;
+       long long                       recno = -1;
+       char                            lustre_path[PATH_MAX];
+       char                            strfid[FID_NOBRACE_LEN + 1];
+       struct hsm_copyaction_private   *hcp;
+       struct llapi_json_item_list     *json_items;
+
+       if (phcp == NULL || *phcp == NULL)
+               return -EINVAL;
+
+       hcp = *phcp;
+
+       rc = llapi_json_init_list(&json_items);
+       if (rc < 0)
+               goto err;
+
+       snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(&hai->hai_dfid));
+       rc = llapi_json_add_item(&json_items, "data_fid",
+                                LLAPI_JSON_STRING, strfid);
+       if (rc < 0)
+               goto err;
+
+       snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(&hai->hai_fid));
+       rc = llapi_json_add_item(&json_items, "source_fid",
+                                LLAPI_JSON_STRING, strfid);
+       if (rc < 0)
+               goto err;
+
+       if (hcp->copy.hc_errval == ECANCELED) {
+               progress_type = CT_CANCEL;
+               goto cancel;
+       }
+
+       if (hcp->copy.hc_errval != 0) {
+               progress_type = CT_ERROR;
+
+               rc = llapi_json_add_item(&json_items, "errno",
+                                        LLAPI_JSON_INTEGER,
+                                        &hcp->copy.hc_errval);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "error",
+                                        LLAPI_JSON_STRING,
+                                        strerror(hcp->copy.hc_errval));
+               if (rc < 0)
+                       goto err;
+
+               goto cancel;
+       }
+
+       /* lustre_path isn't available after a restore completes */
+       /* total_bytes isn't available after a restore or archive completes */
+       if (progress_type != CT_FINISH) {
+               rc = llapi_fid2path(hcp->ct_priv->mnt, strfid, lustre_path,
+                                   sizeof(lustre_path), &recno, &linkno);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "lustre_path",
+                                        LLAPI_JSON_STRING, lustre_path);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "total_bytes",
+                                        LLAPI_JSON_BIGNUM, &total);
+               if (rc < 0)
+                       goto err;
+       }
+
+       if (progress_type == CT_RUNNING)
+               rc = llapi_json_add_item(&json_items, "current_bytes",
+                                        LLAPI_JSON_BIGNUM, &current);
+               if (rc < 0)
+                       goto err;
+
+cancel:
+       rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
+                                (char *)llapi_hsm_ct_ev2str(hai->hai_action +
+                                                            progress_type));
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_hsm_write_json_event(&json_items);
+       if (rc < 0)
+               goto err;
+
+       goto out_free;
+
+err:
+       llapi_error(LLAPI_MSG_ERROR, rc, "error in "
+                   "llapi_hsm_log_ct_progress()");
+
+out_free:
+       if (json_items != NULL)
+               llapi_json_destroy_list(&json_items);
+
+       return rc;
+}
+
+/**
+ * Given a path to a FIFO, create a filehandle for nonblocking writes to it.
+ * Intended to be used for copytool monitoring processes that read an
+ * event stream from the FIFO. Events written in the absence of a reader
+ * are lost.
+ *
+ * \param path               Path to monitor FIFO.
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_register_event_fifo(char *path)
+{
+       int read_fd;
+       struct stat statbuf;
+
+       /* Create the FIFO if necessary. */
+       if ((mkfifo(path, 0644) < 0) && (errno != EEXIST)) {
+               llapi_error(LLAPI_MSG_ERROR, errno, "mkfifo(%s) failed", path);
+               return -errno;
+       }
+       if (errno == EEXIST) {
+               if (stat(path, &statbuf) < 0) {
+                       llapi_error(LLAPI_MSG_ERROR, errno, "mkfifo(%s) failed",
+                                   path);
+                       return -errno;
+               }
+               if (!S_ISFIFO(statbuf.st_mode) ||
+                   ((statbuf.st_mode & 0777) != 0644)) {
+                       llapi_error(LLAPI_MSG_ERROR, errno, "%s exists but is "
+                                   "not a pipe or has a wrong mode", path);
+                       return -errno;
+               }
+       }
+
+       /* Open the FIFO for read so that the subsequent open for write
+        * doesn't immediately fail. */
+       read_fd = open(path, O_RDONLY | O_NONBLOCK);
+       if (read_fd < 0) {
+               llapi_error(LLAPI_MSG_ERROR, errno,
+                           "cannot open(%s) for read", path);
+               return -errno;
+       }
+
+       /* Open the FIFO for writes, but don't block on waiting
+        * for a reader. */
+       llapi_hsm_event_fd = open(path, O_WRONLY | O_NONBLOCK);
+       if (llapi_hsm_event_fd < 0) {
+               llapi_error(LLAPI_MSG_ERROR, errno,
+                           "cannot open(%s) for write", path);
+               return -errno;
+       }
+
+       /* Now close the reader. An external monitoring process can
+        * now open the FIFO for reads. If no reader comes along the
+        * events are lost. NOTE: Only one reader at a time! */
+       close(read_fd);
+
+       /* Ignore SIGPIPEs -- can occur if the reader goes away. */
+       signal(SIGPIPE, SIG_IGN);
+
+       return 0;
+}
+
+/**
+ * Given a path to a FIFO, close its filehandle and delete the FIFO.
+ *
+ * \param path               Path to monitor FIFO.
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_unregister_event_fifo(char *path)
+{
+       /* Noop unless the event fd was initialized */
+       if (llapi_hsm_event_fd < 0)
+               return 0;
+
+       if (close(llapi_hsm_event_fd) < 0)
+               return -errno;
+
+       unlink(path);
+
+       llapi_hsm_event_fd = -1;
+
+       return 0;
+}
+
+/**
+ * Custom logging callback to be used when a monitoring FIFO has been
+ * registered. Formats log entries as JSON events suitable for
+ * consumption by a copytool monitoring process.
+ *
+ * \param level              The message loglevel.
+ * \param _rc                The returncode associated with the message.
+ * \param fmt                The message format string.
+ * \param args               Arguments to be formatted by the format string.
+ *
+ * \retval None.
+ */
+void llapi_hsm_log_error(enum llapi_message_level level, int _rc,
+                        const char *fmt, va_list args)
+{
+       int                             rc;
+       int                             msg_len;
+       int                             real_level;
+       char                            *msg = NULL;
+       va_list                         args2;
+       struct llapi_json_item_list     *json_items;
+
+       /* Noop unless the event fd was initialized */
+       if (llapi_hsm_event_fd < 0)
+               return;
+
+       rc = llapi_json_init_list(&json_items);
+       if (rc < 0)
+               goto err;
+
+       if ((level & LLAPI_MSG_NO_ERRNO) == 0) {
+               rc = llapi_json_add_item(&json_items, "errno",
+                                        LLAPI_JSON_INTEGER,
+                                        &_rc);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "error",
+                                        LLAPI_JSON_STRING,
+                                        strerror(abs(_rc)));
+               if (rc < 0)
+                       goto err;
+       }
+
+       va_copy(args2, args);
+       msg_len = vsnprintf(NULL, 0, fmt, args2) + 1;
+       va_end(args2);
+       if (msg_len >= 0) {
+               msg = (char *) alloca(msg_len);
+               if (msg == NULL) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+
+               rc = vsnprintf(msg, msg_len, fmt, args);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "message",
+                                        LLAPI_JSON_STRING,
+                                        msg);
+               if (rc < 0)
+                       goto err;
+       } else {
+               rc = llapi_json_add_item(&json_items, "message",
+                                        LLAPI_JSON_STRING,
+                                        "INTERNAL ERROR: message failed");
+               if (rc < 0)
+                       goto err;
+       }
+
+       real_level = level & LLAPI_MSG_NO_ERRNO;
+       real_level = real_level > 0 ? level - LLAPI_MSG_NO_ERRNO : level;
+
+       rc = llapi_json_add_item(&json_items, "level", LLAPI_JSON_STRING,
+                                (void *)llapi_msg_level2str(real_level));
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
+                                "LOGGED_MESSAGE");
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_hsm_write_json_event(&json_items);
+       if (rc < 0)
+               goto err;
+
+       goto out_free;
+
+err:
+       /* Write directly to stderr to avoid llapi_error, which now
+        * emits JSON event messages. */
+       fprintf(stderr, "\nFATAL ERROR IN llapi_hsm_log_error(): rc %d,", rc);
+
+out_free:
+       if (json_items != NULL)
+               llapi_json_destroy_list(&json_items);
+
+       return;
+}
+
 /** Register a copytool
  * \param[out] priv Opaque private control structure
  * \param mnt Lustre filesystem mount point
- * \param flags Open flags, currently unused (e.g. O_NONBLOCK)
  * \param archive_count
  * \param archives Which archive numbers this copytool is responsible for
+ * \param rfd_flags flags applied to read fd of pipe (e.g. O_NONBLOCK)
  */
 int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
-                               const char *mnt, int flags, int archive_count,
-                               int *archives)
+                               const char *mnt, int archive_count,
+                               int *archives, int rfd_flags)
 {
        struct hsm_copytool_private     *ct;
        int                              rc;
@@ -103,11 +662,11 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
        if (ct == NULL)
                return -ENOMEM;
 
-       ct->mnt_fd = open(mnt, O_DIRECTORY | O_RDONLY | O_NONBLOCK);
-       if (ct->mnt_fd < 0) {
-               rc = -errno;
-               goto out_err;
-       }
+       ct->magic = CT_PRIV_MAGIC;
+       ct->mnt_fd = -1;
+       ct->open_by_fid_fd = -1;
+       ct->kuc.lk_rfd = LK_NOFD;
+       ct->kuc.lk_wfd = LK_NOFD;
 
        ct->mnt = strdup(mnt);
        if (ct->mnt == NULL) {
@@ -115,7 +674,23 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
                goto out_err;
        }
 
-       ct->magic = CT_PRIV_MAGIC;
+       ct->kuch = malloc(HAL_MAXSIZE + sizeof(*ct->kuch));
+       if (ct->kuch == NULL) {
+               rc = -ENOMEM;
+               goto out_err;
+       }
+
+       ct->mnt_fd = open(ct->mnt, O_RDONLY);
+       if (ct->mnt_fd < 0) {
+               rc = -errno;
+               goto out_err;
+       }
+
+       ct->open_by_fid_fd = openat(ct->mnt_fd, OPEN_BY_FID_PATH, O_RDONLY);
+       if (ct->open_by_fid_fd < 0) {
+               rc = -errno;
+               goto out_err;
+       }
 
        /* no archives specified means "match all". */
        ct->archives = 0;
@@ -137,7 +712,7 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
                ct->archives |= (1 << (archives[rc] - 1));
        }
 
-       rc = libcfs_ukuc_start(&ct->kuc, KUC_GRP_HSM);
+       rc = libcfs_ukuc_start(&ct->kuc, KUC_GRP_HSM, rfd_flags);
        if (rc < 0)
                goto out_err;
 
@@ -153,6 +728,8 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
                rc = 0;
        }
 
+       llapi_hsm_log_ct_registration(&ct, CT_REGISTER);
+
        /* Only the kernel reference keeps the write side open */
        close(ct->kuc.lk_wfd);
        ct->kuc.lk_wfd = LK_NOFD;
@@ -169,9 +746,16 @@ out_kuc:
 out_err:
        if (!(ct->mnt_fd < 0))
                close(ct->mnt_fd);
-       if (ct->mnt != NULL)
-               free(ct->mnt);
+
+       if (!(ct->open_by_fid_fd < 0))
+               close(ct->open_by_fid_fd);
+
+       free(ct->mnt);
+
+       free(ct->kuch);
+
        free(ct);
+
        return rc;
 }
 
@@ -198,20 +782,39 @@ int llapi_hsm_copytool_unregister(struct hsm_copytool_private **priv)
        /* Shut down the kernelcomms */
        libcfs_ukuc_stop(&ct->kuc);
 
+       llapi_hsm_log_ct_registration(&ct, CT_UNREGISTER);
+
+       close(ct->open_by_fid_fd);
        close(ct->mnt_fd);
        free(ct->mnt);
+       free(ct->kuch);
        free(ct);
        *priv = NULL;
 
        return 0;
 }
 
+/** Returns a file descriptor to poll/select on.
+ * \param ct Opaque private control structure
+ * \retval -EINVAL on error
+ * \retval the file descriptor for reading HSM events from the kernel
+ */
+int llapi_hsm_copytool_get_fd(struct hsm_copytool_private *ct)
+{
+       if (ct == NULL || ct->magic != CT_PRIV_MAGIC)
+               return -EINVAL;
+
+       return libcfs_ukuc_get_rfd(&ct->kuc);
+}
+
 /** Wait for the next hsm_action_list
  * \param ct Opaque private control structure
  * \param halh Action list handle, will be allocated here
  * \param msgsize Number of bytes in the message, will be set here
  * \return 0 valid message received; halh and msgsize are set
  *        <0 error code
+ * Note: The application must not call llapi_hsm_copytool_recv until it has
+ * cleared the data in ct->kuch from the previous call.
  */
 int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
                            struct hsm_action_list **halh, int *msgsize)
@@ -226,21 +829,20 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
        if (halh == NULL || msgsize == NULL)
                return -EINVAL;
 
-       kuch = malloc(HAL_MAXSIZE + sizeof(*kuch));
-       if (kuch == NULL)
-               return -ENOMEM;
+       kuch = ct->kuch;
 
+repeat:
        rc = libcfs_ukuc_msg_get(&ct->kuc, (char *)kuch,
                                 HAL_MAXSIZE + sizeof(*kuch),
                                 KUC_TRANSPORT_HSM);
        if (rc < 0)
-               goto out_free;
+               goto out_err;
 
        /* Handle generic messages */
        if (kuch->kuc_transport == KUC_TRANSPORT_GENERIC &&
            kuch->kuc_msgtype == KUC_MSG_SHUTDOWN) {
                rc = -ESHUTDOWN;
-               goto out_free;
+               goto out_err;
        }
 
        if (kuch->kuc_transport != KUC_TRANSPORT_HSM ||
@@ -249,14 +851,14 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
                                  "Unknown HSM message type %d:%d\n",
                                  kuch->kuc_transport, kuch->kuc_msgtype);
                rc = -EPROTO;
-               goto out_free;
+               goto out_err;
        }
 
        if (kuch->kuc_msglen < sizeof(*kuch) + sizeof(*hal)) {
                llapi_err_noerrno(LLAPI_MSG_ERROR, "Short HSM message %d",
                                  kuch->kuc_msglen);
                rc = -EPROTO;
-               goto out_free;
+               goto out_err;
        }
 
        /* Our message is a hsm_action_list. Use pointer math to skip
@@ -273,29 +875,20 @@ int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
                                  " ignoring this request."
                                  " Mask of served archive is 0x%.8X",
                                  hal->hal_archive_id, ct->archives);
-               rc = -EAGAIN;
 
-               goto out_free;
+               goto repeat;
        }
 
        *halh = hal;
        *msgsize = kuch->kuc_msglen - sizeof(*kuch);
        return 0;
 
-out_free:
+out_err:
        *halh = NULL;
        *msgsize = 0;
-       free(kuch);
        return rc;
 }
 
-/** Release the action list when done with it. */
-void llapi_hsm_action_list_free(struct hsm_action_list **hal)
-{
-       /* Reuse the llapi_changelog_free function */
-       llapi_changelog_free((struct changelog_ext_rec **)hal);
-}
-
 /** Get parent path from mount point and fid.
  *
  * \param mnt        Filesystem root path.
@@ -338,6 +931,31 @@ static int fid_parent(const char *mnt, const lustre_fid *fid, char *parent,
        return rc;
 }
 
+static int ct_open_by_fid(const struct hsm_copytool_private *ct,
+                         const struct lu_fid *fid, int open_flags)
+{
+       char fid_name[FID_NOBRACE_LEN + 1];
+       int fd;
+
+       snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid));
+
+       fd = openat(ct->open_by_fid_fd, fid_name, open_flags);
+       return fd < 0 ? -errno : fd;
+}
+
+static int ct_stat_by_fid(const struct hsm_copytool_private *ct,
+                         const struct lu_fid *fid,
+                         struct stat *buf)
+{
+       char fid_name[FID_NOBRACE_LEN + 1];
+       int rc;
+
+       snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid));
+
+       rc = fstatat(ct->open_by_fid_fd, fid_name, buf, 0);
+       return rc ? -errno : 0;
+}
+
 /** Create the destination volatile file for a restore operation.
  *
  * \param hcp        Private copyaction handle.
@@ -346,7 +964,7 @@ static int fid_parent(const char *mnt, const lustre_fid *fid, char *parent,
  * \return 0 on success.
  */
 static int create_restore_volatile(struct hsm_copyaction_private *hcp,
-                                  int mdt_index, int flags)
+                                  int mdt_index, int open_flags)
 {
        int                      rc;
        int                      fd;
@@ -358,15 +976,19 @@ static int create_restore_volatile(struct hsm_copyaction_private *hcp,
        if (rc < 0) {
                /* fid_parent() failed, try to keep on going */
                llapi_error(LLAPI_MSG_ERROR, rc,
-                           "cannot get parent path to restore "DFID
+                           "cannot get parent path to restore "DFID" "
                            "using '%s'", PFID(&hai->hai_fid), mnt);
                snprintf(parent, sizeof(parent), "%s", mnt);
        }
 
-       fd = llapi_create_volatile_idx(parent, mdt_index, flags);
+       fd = llapi_create_volatile_idx(parent, mdt_index, open_flags);
        if (fd < 0)
                return fd;
 
+       rc = fchown(fd, hcp->stat.st_uid, hcp->stat.st_gid);
+       if (rc < 0)
+               goto err_cleanup;
+
        rc = llapi_fd2fid(fd, &hai->hai_dfid);
        if (rc < 0)
                goto err_cleanup;
@@ -422,6 +1044,10 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
                goto ok_out;
 
        if (hai->hai_action == HSMA_RESTORE) {
+               rc = ct_stat_by_fid(hcp->ct_priv, &hai->hai_fid, &hcp->stat);
+               if (rc < 0)
+                       goto err_out;
+
                rc = create_restore_volatile(hcp, restore_mdt_index,
                                             restore_open_flags);
                if (rc < 0)
@@ -434,6 +1060,8 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
                goto err_out;
        }
 
+       llapi_hsm_log_ct_progress(&hcp, hai, CT_START, 0, 0);
+
 ok_out:
        hcp->magic = CP_PRIV_MAGIC;
        *phcp = hcp;
@@ -459,7 +1087,7 @@ err_out:
  * \return 0 on success.
  */
 int llapi_hsm_action_end(struct hsm_copyaction_private **phcp,
-                        const struct hsm_extent *he, int flags, int errval)
+                        const struct hsm_extent *he, int hp_flags, int errval)
 {
        struct hsm_copyaction_private   *hcp;
        struct hsm_action_item          *hai;
@@ -475,6 +1103,27 @@ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp,
 
        hai = &hcp->copy.hc_hai;
 
+       if (hai->hai_action == HSMA_RESTORE && errval == 0) {
+               struct timeval tv[2];
+
+               /* Set {a,m}time of volatile file to that of original. */
+               tv[0].tv_sec = hcp->stat.st_atime;
+               tv[0].tv_usec = 0;
+               tv[1].tv_sec = hcp->stat.st_mtime;
+               tv[1].tv_usec = 0;
+               if (futimes(hcp->data_fd, tv) < 0) {
+                       errval = -errno;
+                       goto end;
+               }
+
+               rc = fsync(hcp->data_fd);
+               if (rc < 0) {
+                       errval = -errno;
+                       goto end;
+               }
+       }
+
+end:
        /* In some cases, like restore, 2 FIDs are used.
         * Set the right FID to use here. */
        if (hai->hai_action == HSMA_ARCHIVE || hai->hai_action == HSMA_RESTORE)
@@ -482,7 +1131,7 @@ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp,
 
        /* Fill the last missing data that will be needed by
         * kernel to send a hsm_progress. */
-       hcp->copy.hc_flags  = flags;
+       hcp->copy.hc_flags  = hp_flags;
        hcp->copy.hc_errval = abs(errval);
 
        hcp->copy.hc_hai.hai_extent = *he;
@@ -493,6 +1142,8 @@ int llapi_hsm_action_end(struct hsm_copyaction_private **phcp,
                goto err_cleanup;
        }
 
+       llapi_hsm_log_ct_progress(&hcp, hai, CT_FINISH, 0, 0);
+
 err_cleanup:
        if (!(hcp->data_fd < 0))
                close(hcp->data_fd);
@@ -506,11 +1157,13 @@ err_cleanup:
 /** Notify a progress in processing an HSM action.
  * \param hdl[in,out]   handle returned by llapi_hsm_action_start.
  * \param he[in]        the range of copied data (for copy actions).
+ * \param total[in]     the expected total of copied data (for copy actions).
  * \param hp_flags[in]  HSM progress flags.
  * \return 0 on success.
  */
 int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
-                             const struct hsm_extent *he, int hp_flags)
+                             const struct hsm_extent *he, __u64 total,
+                             int hp_flags)
 {
        int                      rc;
        struct hsm_progress      hp;
@@ -537,6 +1190,8 @@ int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
        if (rc < 0)
                rc = -errno;
 
+       llapi_hsm_log_ct_progress(&hcp, hai, CT_RUNNING, total, he->length);
+
        return rc;
 }
 
@@ -569,14 +1224,20 @@ int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
 int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp)
 {
        const struct hsm_action_item    *hai = &hcp->copy.hc_hai;
+       int fd;
 
        if (hcp->magic != CP_PRIV_MAGIC)
                return -EINVAL;
 
-       if (hai->hai_action != HSMA_RESTORE)
+       if (hai->hai_action == HSMA_ARCHIVE) {
+               return ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid,
+                               O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
+       } else if (hai->hai_action == HSMA_RESTORE) {
+               fd = dup(hcp->data_fd);
+               return fd < 0 ? -errno : fd;
+       } else {
                return -EINVAL;
-
-       return dup(hcp->data_fd);
+       }
 }
 
 /**
@@ -610,9 +1271,9 @@ int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
                                  stripe_pattern | LOV_PATTERN_F_RELEASED,
                                  pool_name);
        if (fd < 0) {
-               llapi_error(LLAPI_MSG_ERROR, -errno,
+               llapi_error(LLAPI_MSG_ERROR, fd,
                            "cannot create '%s' for import", dst);
-               return -errno;
+               return fd;
        }
 
        /* Get the new fid in Lustre. Caller needs to use this fid
@@ -635,8 +1296,8 @@ int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
        hui.hui_mtime_ns = st->st_mtim.tv_nsec;
        rc = ioctl(fd, LL_IOC_HSM_IMPORT, &hui);
        if (rc != 0) {
-               llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst);
                rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst);
                goto out_unlink;
        }