Whamcloud - gitweb
LU-2062 utils: HSM Posix CopyTool 37/4737/29
authorjcl <jc.lafoucriere@cea.fr>
Thu, 2 Dec 2010 20:57:59 +0000 (21:57 +0100)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 3 Aug 2013 21:45:12 +0000 (21:45 +0000)
POSIX HSM CopyTool utils named lhsmtool_posix.
This user space command is the 'glue" between Lustre-HSM
and a POSIX filesytem used as a backend.
The main functionalities implemented are:
daemon mode:
- archive: read in lustre write with POSIX backend
- restore: read in POSIX backend write to Lustre
- remove: remove an entry from backend
cmd line mode:
- import: create in lustre a released file from a backend file
- rebind: change the FID associated to a file in the backend
- maxseq: get the larger sequence of FID found in the backend

The 2 last options are used for disaster recovery mode

This tools is also used for all the non regression tests made in
sanity-hsm.sh

Signed-off-by: JC Lafoucriere <jacques-charles.lafoucriere@cea.fr>
Signed-off-by: Henri Doreau <henri.doreau@cea.fr>
Change-Id: I2d6cf5c9bd1f714ad407929f4603f68c5b8f5ec3
Reviewed-on: http://review.whamcloud.com/4737
Tested-by: Hudson
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
12 files changed:
lustre/include/lustre/lustre_user.h
lustre/include/lustre/lustreapi.h
lustre/tests/Makefile.am
lustre/tests/copytool.c [deleted file]
lustre/tests/sanity-hsm.sh
lustre/tests/sanity.sh
lustre/utils/.gitignore
lustre/utils/Makefile.am
lustre/utils/lfs.c
lustre/utils/lhsmtool_posix.c [new file with mode: 0644]
lustre/utils/liblustreapi.c
lustre/utils/liblustreapi_hsm.c

index 36fbce9..c856390 100644 (file)
@@ -322,6 +322,9 @@ struct ost_id {
 #define LOV_ALL_STRIPES       0xffff /* only valid for directories */
 #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
 
+#define XATTR_LUSTRE_PREFIX    "lustre."
+#define XATTR_LUSTRE_LOV       XATTR_LUSTRE_PREFIX"lov"
+
 #define lov_user_ost_data lov_user_ost_data_v1
 struct lov_user_ost_data_v1 {     /* per-stripe data structure */
        struct ost_id l_ost_oi;   /* OST object ID */
@@ -1169,12 +1172,6 @@ struct hsm_progress {
        __u32                   padding;
 };
 
-/**
- * Use by copytool during any hsm request they handled.
- * This structure is initialized by llapi_hsm_copy_start()
- * which is an helper over the ioctl() interface
- * Store Lustre, internal use only, data.
- */
 struct hsm_copy {
        __u64                   hc_data_version;
        __u16                   hc_flags;
index 880189d..ea88e07 100644 (file)
@@ -219,8 +219,8 @@ extern int llapi_lmv_get_uuids(int fd, struct obd_uuid *uuidp, int *mdt_count);
 extern int llapi_is_lustre_mnttype(const char *type);
 extern int llapi_search_ost(char *fsname, char *poolname, char *ostname);
 extern int llapi_get_obd_count(char *mnt, int *count, int is_mdt);
-extern int parse_size(char *optarg, unsigned long long *size,
-                      unsigned long long *size_units, int bytes_spec);
+extern int llapi_parse_size(const char *optarg, unsigned long long *size,
+                           unsigned long long *size_units, int bytes_spec);
 extern int llapi_search_mounts(const char *pathname, int index,
                                char *mntdir, char *fsname);
 extern int llapi_search_fsname(const char *pathname, char *fsname);
@@ -298,18 +298,27 @@ extern int llapi_changelog_clear(const char *mdtname, const char *idstr,
  * priv is private state, managed internally by these functions
  */
 struct hsm_copytool_private;
-extern int llapi_hsm_copytool_start(struct hsm_copytool_private **priv,
-                                   char *fsname, int flags,
-                                   int archive_count, int *archives);
-extern int llapi_hsm_copytool_fini(struct hsm_copytool_private **priv);
+struct hsm_copyaction_private;
+
+extern int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
+                                      const char *mnt, int flags,
+                                      int archive_count, int *archives);
+extern int llapi_hsm_copytool_unregister(struct hsm_copytool_private **priv);
 extern int llapi_hsm_copytool_recv(struct hsm_copytool_private *priv,
                                   struct hsm_action_list **hal, int *msgsize);
-extern int llapi_hsm_copytool_free(struct hsm_action_list **hal);
-extern int llapi_hsm_copy_start(char *mnt, struct hsm_copy *copy,
-                               const struct hsm_action_item *hai);
-extern int llapi_hsm_copy_end(char *mnt, struct hsm_copy *copy,
-                             const struct hsm_progress *hp);
-extern int llapi_hsm_progress(char *mnt, struct hsm_progress *hp);
+extern void llapi_hsm_action_list_free(struct hsm_action_list **hal);
+extern int llapi_hsm_action_begin(struct hsm_copyaction_private **hcp,
+                                 const struct hsm_copytool_private *ct_priv,
+                                 const struct hsm_action_item *hai,
+                                 bool is_error);
+extern int llapi_hsm_action_end(struct hsm_copyaction_private **hcp,
+                               const struct hsm_extent *he, int flags,
+                               int errval);
+extern int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
+                                    const struct hsm_extent *he, int hp_flags);
+extern int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
+                                    lustre_fid *fid);
+extern int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp);
 extern int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
                            unsigned long long stripe_size, int stripe_offset,
                            int stripe_count, int stripe_pattern,
@@ -318,7 +327,8 @@ extern int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
 /* HSM user interface */
 extern struct hsm_user_request *llapi_hsm_user_request_alloc(int itemcount,
                                                             int data_len);
-extern int llapi_hsm_request(char *mnt, struct hsm_user_request *request);
+extern int llapi_hsm_request(const char *path,
+                            const struct hsm_user_request *request);
 extern int llapi_hsm_current_action(const char *path,
                                    struct hsm_current_action *hca);
 /** @} llapi */
index b04a7f0..38ad9c0 100644 (file)
@@ -67,7 +67,7 @@ noinst_PROGRAMS += small_write multiop ll_sparseness_verify
 noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany
 noinst_PROGRAMS += openfilleddirunlink rename_many memhog
 noinst_PROGRAMS += mmap_sanity writemany reads flocks_test
-noinst_PROGRAMS += write_time_limit rwv copytool lgetxattr_size_check checkfiemap
+noinst_PROGRAMS += write_time_limit rwv lgetxattr_size_check checkfiemap
 
 bin_PROGRAMS = mcreate munlink
 testdir = $(libdir)/lustre/tests
@@ -81,7 +81,6 @@ mmap_sanity_SOURCES= mmap_sanity.c
 
 LIBLUSTREAPI = $(top_builddir)/lustre/utils/liblustreapi.a
 multiop_LDADD=$(LIBLUSTREAPI) -lrt $(PTHREAD_LIBS) $(LIBCFS)
-copytool_LDADD=$(LIBLUSTREAPI) $(PTHREAD_LIBS) $(LIBCFS)
 it_test_LDADD=$(LIBCFS)
 rwv_LDADD=$(LIBCFS)
 
diff --git a/lustre/tests/copytool.c b/lustre/tests/copytool.c
deleted file mode 100644 (file)
index b877e5b..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- *
- */
-
-/* HSM copytool example program.
- * The copytool acts on action requests from Lustre to copy files to and from
- * an HSM archive system.
- *
- * Note: under Linux, until llapi_hsm_copytool_fini is called (or the program is
- * killed), the libcfs module will be referenced and unremovable,
- * even after Lustre services stop.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <signal.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <libcfs/libcfs.h>
-#include <lustre/lustreapi.h>
-
-struct hsm_copytool_private *ctdata;
-
-void handler(int signal ) {
-        psignal(signal, "exiting");
-        /* If we don't clean up upon interrupt, umount thinks there's a ref
-         * and doesn't remove us from mtab (EINPROGRESS). The lustre client
-         * does successfully unmount and the mount is actually gone, but the
-         * mtab entry remains. So this just makes mtab happier. */
-       llapi_hsm_copytool_fini(&ctdata);
-        exit(1);
-}
-
-int main(int argc, char **argv) {
-        int c, test = 0;
-        struct option long_opts[] = {
-                {"test", no_argument, 0, 't'},
-                {0, 0, 0, 0}
-        };
-        int archives[] = {1}; /* which archives we care about */
-        int rc;
-
-        optind = 0;
-        while ((c = getopt_long(argc, argv, "t", long_opts, NULL)) != -1) {
-                switch (c) {
-                case 't':
-                        test++;
-                        break;
-                default:
-                        fprintf(stderr, "error: %s: option '%s' unrecognized\n",
-                                argv[0], argv[optind - 1]);
-                        return EINVAL;
-                }
-        }
-
-        if (optind != argc - 1) {
-                fprintf(stderr, "Usage: %s <fsname>\n", argv[0]);
-                return -EINVAL;
-        }
-
-       rc = llapi_hsm_copytool_start(&ctdata, argv[optind], 0,
-                                  ARRAY_SIZE(archives), archives);
-        if (rc < 0) {
-                fprintf(stderr, "Can't start copytool interface: %s\n",
-                        strerror(-rc));
-                return -rc;
-        }
-
-        if (test)
-               return -llapi_hsm_copytool_fini(&ctdata);
-
-        printf("Waiting for message from kernel (pid=%d)\n", getpid());
-
-        signal(SIGINT, handler);
-
-        while(1) {
-               struct hsm_action_list *hal;
-               struct hsm_action_item *hai;
-               int msgsize, i = 0;
-
-               rc = llapi_hsm_copytool_recv(ctdata, &hal, &msgsize);
-               if (rc == -ESHUTDOWN) {
-                       fprintf(stderr, "shutting down");
-                       break;
-               }
-               if (rc < 0) {
-                       fprintf(stderr, "Message receive: %s", strerror(-rc));
-                       break;
-               }
-               if (msgsize == 0)
-                       continue; /* msg not for us */
-
-               printf("Copytool fs=%s archive#=%d item_count=%d\n",
-                       hal->hal_fsname, hal->hal_archive_id, hal->hal_count);
-
-               hai = hai_zero(hal);
-               while (++i <= hal->hal_count) {
-                       printf("Item %d: action %d reclen %d\n", i,
-                               hai->hai_action, hai->hai_len);
-                       printf(" "DFID" gid="LPU64" cookie="LPU64"\n",
-                               PFID(&hai->hai_fid), hai->hai_gid,
-                               hai->hai_cookie);
-                       hai = hai_next(hai);
-               }
-
-               llapi_hsm_copytool_free(&hal);
-        }
-
-       llapi_hsm_copytool_fini(&ctdata);
-
-        return -rc;
-}
-
-
-
index fa2845c..ec75d2c 100644 (file)
@@ -10,7 +10,7 @@ set -e
 set +o monitor
 
 SRCDIR=`dirname $0`
-export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin
+export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin/
 
 ONLY=${ONLY:-"$*"}
 SANITY_HSM_EXCEPT=${SANITY_HSM_EXCEPT:-""}
@@ -27,6 +27,7 @@ ORIG_PWD=${PWD}
 MCREATE=${MCREATE:-mcreate}
 
 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+
 . $LUSTRE/tests/test-framework.sh
 init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
@@ -63,8 +64,8 @@ copytool_cleanup() {
 }
 
 copytool_setup() {
-       # TODO: add copytool setup code here!
-       return
+       rm -rf $HSM_ARCHIVE
+       mkdir -p $HSM_ARCHIVE
 }
 
 fail() {
@@ -72,6 +73,11 @@ fail() {
        error $*
 }
 
+export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
+export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
+HSM_ARCHIVE=${HSM_ARCHIVE:-$TMP/arc}
+HSM_ARCHIVE_NUMBER=2
+
 path2fid() {
        $LFS path2fid $1 | tr -d '[]'
 }
@@ -240,6 +246,32 @@ test_3() {
 }
 run_test 3 "Check file dirtyness when opening for write"
 
+test_11() {
+       mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
+       cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile
+       local f=$DIR/$tdir/$tfile
+       $HSMTOOL $HSMTOOL_VERBOSE --archive $HSM_ARCHIVE_NUMBER \
+               --hsm_root $HSM_ARCHIVE --import $tdir/$tfile $f $MOUNT ||
+               error "import failed"
+       echo -n "Verifying released state: "
+       $LFS hsm_state $f
+       $LFS hsm_state $f | grep -q "released exists archived" ||
+               error "flags not set"
+       local LSZ=$(stat -c "%s" $f)
+       local ASZ=$(stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile)
+       echo "Verifying imported size $LSZ=$ASZ"
+       [[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ"
+       echo -n "Verifying released pattern: "
+       local PTRN=$($GETSTRIPE -L $f)
+       echo $PTRN
+       [[ $PTRN == 80000001 ]] || error "Is not released"
+       local fid=$(path2fid $f)
+       echo "Verifying new fid $fid in archive"
+       local AFILE=$(ls $HSM_ARCHIVE/*/*/*/*/*/*/$fid) || \
+               error "fid $fid not in archive $HSM_ARCHIVE"
+}
+run_test 11 "Import a file"
+
 test_20() {
        mkdir -p $DIR/$tdir
 
index 3a141d9..4ef4b53 100644 (file)
@@ -9498,20 +9498,6 @@ test_162() {
 }
 run_test 162 "path lookup sanity"
 
-test_163() {
-       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
-       remote_mds_nodsh && skip "remote MDS with nodsh" && return
-       copytool --test $FSNAME || { skip "copytool not runnable: $?" && return; }
-       copytool $FSNAME &
-       sleep 1
-       local uuid=$($LCTL get_param -n mdc.${FSNAME}-MDT0000-mdc-*.uuid)
-       # this proc file is temporary and linux-only
-       do_facet $SINGLEMDS lctl set_param mdt.${FSNAME}-MDT0000.mdccomm=$uuid ||\
-         error "kernel->userspace send failed"
-       kill -INT $!
-}
-run_test 163 "kernel <-> userspace comms"
-
 test_169() {
        # do directio so as not to populate the page cache
        log "creating a 10 Mb file"
index d0e901a..1e5b331 100644 (file)
@@ -27,3 +27,5 @@
 /ltrack_stats
 /lustre_rsync
 /ll_decode_filter_fid
+/lhsmd_posix
+/lhsmtool_posix
index 4d28e62..8bf84c4 100644 (file)
@@ -28,7 +28,7 @@ if SERVER
 sbin_PROGRAMS += mkfs.lustre tunefs.lustre
 endif
 if LIBPTHREAD
-sbin_PROGRAMS += loadgen
+sbin_PROGRAMS += loadgen lhsmtool_posix
 endif
 bin_PROGRAMS = lfs req_layout
 bin_SCRIPTS = $(bin_scripts)
@@ -173,6 +173,10 @@ l_getidentity_DEPENDENCIES := $(LIBPTLCTL)
 
 ltrack_stats_SOURCES = ltrack_stats.c
 
+lhsmtool_posix_SOURCES = lhsmtool_posix.c
+lhsmtool_posix_LDADD := liblustreapi.a $(PTHREAD_LIBS)
+lhsmtool_posix_DEPENDENCIES := liblustreapi.a
+
 EXTRA_DIST = $(sbin_scripts) $(bin_scripts)
 
 # NOTE: this should only be run on i386.
index 9e85a66..6bcf972 100644 (file)
@@ -723,15 +723,16 @@ static int lfs_setstripe(int argc, char **argv)
                 return CMD_HELP;
         }
 
-        /* get the stripe size */
-        if (stripe_size_arg != NULL) {
-                result = parse_size(stripe_size_arg, &st_size, &size_units, 0);
-                if (result) {
-                        fprintf(stderr, "error: %s: bad stripe size '%s'\n",
-                                argv[0], stripe_size_arg);
-                        return result;
-                }
-        }
+       /* get the stripe size */
+       if (stripe_size_arg != NULL) {
+               result = llapi_parse_size(stripe_size_arg, &st_size,
+                                         &size_units, 0);
+               if (result) {
+                       fprintf(stderr, "error: %s: bad stripe size '%s'\n",
+                               argv[0], stripe_size_arg);
+                       return result;
+               }
+       }
         /* get the stripe offset */
         if (stripe_off_arg != NULL) {
                 st_offset = strtol(stripe_off_arg, &end, 0);
@@ -1139,44 +1140,44 @@ err_free:
                         break;
                 case 'P':
                         break;
-                case 's':
-                        if (optarg[0] == '+') {
-                                param.size_sign = -1;
-                                optarg++;
-                        } else if (optarg[0] == '-') {
-                                param.size_sign =  1;
-                                optarg++;
-                        }
+               case 's':
+                       if (optarg[0] == '+') {
+                               param.size_sign = -1;
+                               optarg++;
+                       } else if (optarg[0] == '-') {
+                               param.size_sign =  1;
+                               optarg++;
+                       }
 
-                        ret = parse_size(optarg, &param.size,
-                                         &param.size_units, 0);
-                        if (ret) {
-                                fprintf(stderr, "error: bad file size '%s'\n",
-                                        optarg);
-                                goto err;
-                        }
-                        param.check_size = 1;
-                        param.exclude_size = !!neg_opt;
-                        break;
-                case 'S':
-                        if (optarg[0] == '+') {
-                                param.stripesize_sign = -1;
-                                optarg++;
-                        } else if (optarg[0] == '-') {
-                                param.stripesize_sign =  1;
-                                optarg++;
-                        }
+                       ret = llapi_parse_size(optarg, &param.size,
+                                              &param.size_units, 0);
+                       if (ret) {
+                               fprintf(stderr, "error: bad file size '%s'\n",
+                                       optarg);
+                               goto err;
+                       }
+                       param.check_size = 1;
+                       param.exclude_size = !!neg_opt;
+                       break;
+               case 'S':
+                       if (optarg[0] == '+') {
+                               param.stripesize_sign = -1;
+                               optarg++;
+                       } else if (optarg[0] == '-') {
+                               param.stripesize_sign =  1;
+                               optarg++;
+                       }
 
-                        ret = parse_size(optarg, &param.stripesize,
-                                         &param.stripesize_units, 0);
-                        if (ret) {
-                                fprintf(stderr, "error: bad stripe_size '%s'\n",
-                                        optarg);
-                                goto err;
-                        }
-                        param.check_stripesize = 1;
-                        param.exclude_stripesize = !!neg_opt;
-                        break;
+                       ret = llapi_parse_size(optarg, &param.stripesize,
+                                              &param.stripesize_units, 0);
+                       if (ret) {
+                               fprintf(stderr, "error: bad stripe_size '%s'\n",
+                                       optarg);
+                               goto err;
+                       }
+                       param.check_stripesize = 1;
+                       param.exclude_stripesize = !!neg_opt;
+                       break;
                 case 't':
                         param.exclude_type = !!neg_opt;
                         switch(optarg[0]) {
@@ -2179,17 +2180,17 @@ error:
         return ULONG_MAX;
 }
 
-#define ARG2ULL(nr, str, def_units)                                     \
-do {                                                                    \
-        unsigned long long limit, units = def_units;                    \
-        int rc;                                                         \
-                                                                        \
-        rc = parse_size(str, &limit, &units, 1);                        \
-        if (rc < 0) {                                                   \
-                fprintf(stderr, "error: bad limit value %s\n", str);    \
-                return CMD_HELP;                                        \
-        }                                                               \
-        nr = limit;                                                     \
+#define ARG2ULL(nr, str, def_units)                                    \
+do {                                                                   \
+       unsigned long long limit, units = def_units;                    \
+       int rc;                                                         \
+                                                                       \
+       rc = llapi_parse_size(str, &limit, &units, 1);                  \
+       if (rc < 0) {                                                   \
+               fprintf(stderr, "error: bad limit value %s\n", str);    \
+               return CMD_HELP;                                        \
+       }                                                               \
+       nr = limit;                                                     \
 } while (0)
 
 static inline int has_times_option(int argc, char **argv)
diff --git a/lustre/utils/lhsmtool_posix.c b/lustre/utils/lhsmtool_posix.c
new file mode 100644 (file)
index 0000000..8b7c426
--- /dev/null
@@ -0,0 +1,1848 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.htm
+ *
+ * GPL HEADER END
+ */
+/*
+ * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
+ *     alternatives
+ *
+ */
+/* HSM copytool program for POSIX filesystem-based HSM's.
+ *
+ * An HSM copytool daemon acts on action requests from Lustre to copy files
+ * to and from an HSM archive system. This one in particular makes regular
+ * POSIX filesystem calls to a given path, where an HSM is presumably mounted.
+ *
+ * This particular tool can also import an existing HSM archive.
+ */
+
+#include <utime.h>
+#include <sys/xattr.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <sys/errno.h>
+#include <lustre/lustre_idl.h>
+#include <lustre/lustreapi.h>
+
+/* Progress reporting period */
+#define REPORT_INTERVAL_DEFAULT 30
+/* HSM hash subdir permissions */
+#define DIR_PERM S_IRWXU
+/* HSM hash file permissions */
+#define FILE_PERM (S_IRUSR | S_IWUSR)
+
+#define ONE_MB 0x100000
+
+/* copytool uses a 32b bitmask field to register with kuc
+ * archive num = 0 => all
+ * archive num from 1 to 32
+ */
+#define MAX_ARCHIVE_CNT (sizeof(__u32) * 8)
+
+enum ct_action {
+       CA_IMPORT = 1,
+       CA_REBIND,
+       CA_MAXSEQ,
+       CA_DAEMON,
+};
+
+struct options {
+       int                      o_copy_attrs;
+       int                      o_dry_run;
+       int                      o_abort_on_error;
+       int                      o_shadow_tree;
+       int                      o_verbose;
+       int                      o_copy_xattrs;
+       int                      o_archive_cnt;
+       int                      o_archive_id[MAX_ARCHIVE_CNT];
+       int                      o_report_int;
+       unsigned long long       o_bandwidth;
+       size_t                   o_chunk_size;
+       enum ct_action           o_action;
+       char                    *o_mnt;
+       char                    *o_hsm_root;
+       char                    *o_src; /* for import, or rebind */
+       char                    *o_dst; /* for import, or rebind */
+};
+
+/* everything else is zeroed */
+struct options opt = {
+       .o_copy_attrs = 1,
+       .o_shadow_tree = 1,
+       .o_verbose = LLAPI_MSG_WARN,
+       .o_copy_xattrs = 1,
+       .o_report_int = REPORT_INTERVAL_DEFAULT,
+       .o_chunk_size = ONE_MB,
+};
+
+/* The LLAPI will hold an open FD on lustre for us. Additionally open one on
+ * the archive FS root to make sure it doesn't drop out from under us (and
+ * remind the admin to shutdown the copytool before unmounting). */
+static int arc_fd = -1;
+
+static int err_major;
+static int err_minor;
+
+static char cmd_name[PATH_MAX];
+static char fs_name[MAX_OBD_NAME + 1];
+
+static struct hsm_copytool_private *ctdata;
+
+
+#define CT_ERROR(format, ...) \
+       llapi_printf(LLAPI_MSG_ERROR, "%s: "format, cmd_name, ## __VA_ARGS__)
+#define CT_DEBUG(format, ...) \
+       llapi_printf(LLAPI_MSG_DEBUG, "%s: "format, cmd_name, ## __VA_ARGS__)
+#define CT_WARN(format, ...) \
+       llapi_printf(LLAPI_MSG_WARN, "%s: "format, cmd_name, ## __VA_ARGS__)
+#define CT_TRACE(format, ...) \
+       llapi_printf(LLAPI_MSG_INFO, "%s: "format, cmd_name, ## __VA_ARGS__)
+
+static void usage(const char *name, int rc)
+{
+       fprintf(stdout,
+       " Usage: %s [options]... <mode> <lustre_mount_point>\n"
+       "The Lustre HSM Posix copy tool can be used as a daemon or "
+       "as a command line tool\n"
+       "The Lustre HSM daemon acts on action requests from Lustre\n"
+       "to copy files to and from an HSM archive system.\n"
+       "This POSIX-flavored daemon makes regular POSIX filesystem calls\n"
+       "to an HSM mounted at a given hsm_root.\n"
+       "   -d, --daemon        Daemon mode, run in background\n"
+       " Options:\n"
+       "   --no-attr           Don't copy file attributes\n"
+       "   --no-shadow         Don't create shadow namespace in archive\n"
+       "   --no-xattr          Don't copy file extended attributes\n"
+       "The Lustre HSM tool performs administrator-type actions\n"
+       "on a Lustre HSM archive.\n"
+       "This POSIX-flavored tool can link an existing HSM namespace\n"
+       "into a Lustre filesystem.\n"
+       " Usage:\n"
+       "   %s [options] --import <src> <dst> <lustre_mount_point>\n"
+       "      import an archived subtree at\n"
+       "       <src> (relative to hsm_root) into the Lustre filesystem at\n"
+       "       <dst> (absolute)\n"
+       "   %s [options] --rebind <old_FID> <new_FID> <lustre_mount_point>\n"
+       "      rebind an entry in the HSM to a new FID\n"
+       "       <old_FID> old FID the HSM entry is bound to\n"
+       "       <new_FID> new FID to bind the HSM entry to\n"
+       "   %s [options] --rebind <list_file> <lustre_mount_point>\n"
+       "      perform the rebind operation for all FID in the list file\n"
+       "       each line of <list_file> consists of <old_FID> <new_FID>\n"
+       "   %s [options] --max-sequence <fsname>\n"
+       "       return the max fid sequence of archived files\n"
+       "   -A, --archive <#>        Archive number (repeatable)\n"
+       "   -p, --hsm-root <path>    Target HSM mount point\n"
+       "   -q, --quiet              Produce less verbose output\n"
+       "   -v, --verbose            Produce more verbose output\n"
+       "   -c, --chunk-size <sz>    I/O size used during data copy\n"
+       "                            (unit can be used, default is MB)\n"
+       "   --abort-on-error         Abort operation on major error\n"
+       "   --dry-run                Don't run, just show what would be done\n"
+       "   --bandwidth <bw>         Limit I/O bandwidth (unit can be used\n,"
+       "                            default is MB)\n",
+       cmd_name, cmd_name, cmd_name, cmd_name, cmd_name);
+
+       exit(rc);
+}
+
+static int ct_parseopts(int argc, char * const *argv)
+{
+       struct option long_opts[] = {
+               {"abort-on-error", no_argument,       &opt.o_abort_on_error, 1},
+               {"abort_on_error", no_argument,       &opt.o_abort_on_error, 1},
+               {"archive",        required_argument, NULL,                'A'},
+               {"bandwidth",      required_argument, NULL,                'b'},
+               {"chunk-size",     required_argument, NULL,                'c'},
+               {"chunk_size",     required_argument, NULL,                'c'},
+               {"daemon",         no_argument,       NULL,                'd'},
+               {"dry-run",        no_argument,       &opt.o_dry_run,       1},
+               {"help",           no_argument,       NULL,                'h'},
+               {"hsm-root",       required_argument, NULL,                'p'},
+               {"hsm_root",       required_argument, NULL,                'p'},
+               {"import",         no_argument,       NULL,                'i'},
+               {"max-sequence",   no_argument,       NULL,                'M'},
+               {"max_sequence",   no_argument,       NULL,                'M'},
+               {"no-attr",        no_argument,       &opt.o_copy_attrs,    0},
+               {"no_attr",        no_argument,       &opt.o_copy_attrs,    0},
+               {"no-shadow",      no_argument,       &opt.o_shadow_tree,   0},
+               {"no_shadow",      no_argument,       &opt.o_shadow_tree,   0},
+               {"no-xattr",       no_argument,       &opt.o_copy_xattrs,   0},
+               {"no_xattr",       no_argument,       &opt.o_copy_xattrs,   0},
+               {"quiet",          no_argument,       NULL,                'q'},
+               {"rebind",         no_argument,       NULL,                'r'},
+               {"report",         required_argument, &opt.o_report_int,    0},
+               {"verbose",        no_argument,       NULL,                'v'},
+               {0, 0, 0, 0}
+       };
+       int                      c;
+       unsigned long long       value;
+       unsigned long long       unit;
+
+       optind = 0;
+       while ((c = getopt_long(argc, argv, "A:b:c:dhiMp:qruv",
+                               long_opts, NULL)) != -1) {
+               switch (c) {
+               case 'A':
+                       if ((opt.o_archive_cnt >= MAX_ARCHIVE_CNT) ||
+                           (atoi(optarg) >= MAX_ARCHIVE_CNT)) {
+                               CT_ERROR("archive number must be less"
+                                        "than %lu\n", MAX_ARCHIVE_CNT);
+                               return -E2BIG;
+                       }
+                       opt.o_archive_id[opt.o_archive_cnt] = atoi(optarg);
+                       opt.o_archive_cnt++;
+                       break;
+               case 'b': /* -b and -c have both a number with unit as arg */
+               case 'c':
+                       unit = ONE_MB;
+                       if (llapi_parse_size(optarg, &value, &unit, 0) < 0) {
+                               CT_ERROR("bad value for -%c '%s'\n", c, optarg);
+                               return -EINVAL;
+                       }
+                       if (c == 'c')
+                               opt.o_chunk_size = value;
+                       else
+                               opt.o_bandwidth = value;
+                       break;
+               case 'd':
+                       opt.o_action = CA_DAEMON;
+                       break;
+               case 'h':
+                       usage(argv[0], 0);
+               case 'i':
+                       opt.o_action = CA_IMPORT;
+                       break;
+               case 'M':
+                       opt.o_action = CA_MAXSEQ;
+                       break;
+               case 'p':
+                       opt.o_hsm_root = optarg;
+                       break;
+               case 'q':
+                       opt.o_verbose--;
+                       break;
+               case 'r':
+                       opt.o_action = CA_REBIND;
+                       break;
+               case 'v':
+                       opt.o_verbose++;
+                       break;
+               case 0:
+                       break;
+               default:
+                       CT_ERROR("unrecognized option '%s'\n",
+                                argv[optind - 1]);
+                       return -EINVAL;
+               }
+       }
+
+       switch (opt.o_action) {
+       case CA_IMPORT:
+               /* src dst mount_point */
+               if (argc != optind + 3) {
+                       CT_ERROR("--import requires 2 arguments\n");
+                       return -EINVAL;
+               }
+               opt.o_src = argv[optind++];
+               opt.o_dst = argv[optind++];
+               break;
+       case CA_REBIND:
+               /* FID1 FID2 mount_point or FILE mount_point */
+               if (argc == optind + 2) {
+                       opt.o_src = argv[optind++];
+                       opt.o_dst = NULL;
+               } else if (argc == optind + 3) {
+                       opt.o_src = argv[optind++];
+                       opt.o_dst = argv[optind++];
+               } else {
+                       CT_ERROR("--rebind requires 1 or 2 arguments\n");
+                       return -EINVAL;
+               }
+               break;
+       case CA_MAXSEQ:
+       default:
+               /* just mount point */
+               break;
+       }
+
+       if (argc != optind + 1) {
+               CT_ERROR("no mount point specified\n");
+               return -EINVAL;
+       }
+
+       opt.o_mnt = argv[optind];
+
+       CT_TRACE("action=%d src=%s dst=%s mount_point=%s\n",
+                opt.o_action, opt.o_src, opt.o_dst, opt.o_mnt);
+
+       if (!opt.o_dry_run && opt.o_hsm_root == NULL) {
+               CT_ERROR("must specify a HSM root\n");
+               return -EINVAL;
+       }
+
+       if (opt.o_action == CA_IMPORT) {
+               if (opt.o_src && opt.o_src[0] == '/') {
+                       CT_ERROR("source path must be relative to HSM root.\n");
+                       return -EINVAL;
+               }
+
+               if (opt.o_dst && opt.o_dst[0] != '/') {
+                       CT_ERROR("destination path must be absolute.\n");
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+/* mkdir -p path */
+static int ct_mkdir_p(const char *path)
+{
+       char    *saved, *ptr;
+       int      rc;
+
+       ptr = strdup(path);
+       saved = ptr;
+       while (*ptr == '/')
+               ptr++;
+
+       while ((ptr = strchr(ptr, '/')) != NULL) {
+               *ptr = '\0';
+               rc = mkdir(saved, DIR_PERM);
+               *ptr = '/';
+               if (rc < 0 && errno != EEXIST) {
+                       CT_ERROR("'%s' mkdir failed (%s)\n", path,
+                                strerror(errno));
+                       free(saved);
+                       return -errno;
+               }
+               ptr++;
+       }
+
+       free(saved);
+
+       return 0;
+}
+
+static int ct_save_stripe(int src_fd, const char *src, const char *dst)
+{
+       char                     lov_file[PATH_MAX];
+       char                     lov_buf[XATTR_SIZE_MAX];
+       struct lov_user_md      *lum;
+       int                      rc;
+       int                      fd;
+       ssize_t                  xattr_size;
+
+       snprintf(lov_file, sizeof(lov_file), "%s.lov", dst);
+       CT_TRACE("saving stripe info of '%s' in %s\n", src, lov_file);
+
+       xattr_size = fgetxattr(src_fd, XATTR_LUSTRE_LOV, lov_buf,
+                              sizeof(lov_buf));
+       if (xattr_size < 0) {
+               CT_ERROR("'%s' cannot get stripe info on (%s)\n", src,
+                        strerror(errno));
+               return -errno;
+       }
+
+       lum = (struct lov_user_md *)lov_buf;
+
+       if (lum->lmm_magic == LOV_USER_MAGIC_V1 ||
+           lum->lmm_magic == LOV_USER_MAGIC_V3) {
+               /* Set stripe_offset to -1 so that it is not interpreted as a
+                * hint on restore. */
+               lum->lmm_stripe_offset = -1;
+       }
+
+       fd = open(lov_file, O_TRUNC | O_CREAT | O_WRONLY, FILE_PERM);
+       if (fd < 0) {
+               CT_ERROR("'%s' cannot open (%s)\n", lov_file, strerror(errno));
+               return -errno;
+       }
+
+       rc = write(fd, lum, xattr_size);
+       if (rc < 0) {
+               CT_ERROR("'%s' cannot write %d bytes (%s)\n",
+                        lov_file, xattr_size, strerror(errno));
+               close(fd);
+               return -errno;
+       }
+
+       rc = close(fd);
+       if (rc < 0) {
+               CT_ERROR("'%s' cannot close (%s)\n", lov_file, strerror(errno));
+               return -errno;
+       }
+
+       return 0;
+}
+
+static int ct_load_stripe(const char *src, struct lov_user_md_v3 *lum,
+                         size_t *lum_size)
+{
+       char     lov_file[PATH_MAX];
+       int      rc;
+       int      fd;
+
+       snprintf(lov_file, sizeof(lov_file), "%s.lov", src);
+       CT_TRACE("reading stripe rules from '%s' for '%s'\n", lov_file, src);
+
+       fd = open(lov_file, O_RDONLY);
+       if (fd < 0) {
+               CT_ERROR("'%s' cannot open (%s)\n", lov_file, strerror(errno));
+               return -ENODATA;
+       }
+
+       rc = read(fd, lum, *lum_size);
+       if (rc < 0) {
+               CT_ERROR("'%s' cannot read %lu bytes (%s)\n", lov_file,
+                        lum_size, strerror(errno));
+               close(fd);
+               return -ENODATA;
+       }
+
+       *lum_size = (size_t)rc;
+       close(fd);
+
+       return 0;
+}
+
+static int ct_restore_stripe(const char *src, const char *dst, int dst_fd)
+{
+       int                      rc;
+       char                     lov_buf[XATTR_SIZE_MAX];
+       size_t                   lum_size = sizeof(lov_buf);
+
+       rc = ct_load_stripe(src, (struct lov_user_md_v3 *)lov_buf, &lum_size);
+       if (rc) {
+               CT_WARN("'%s' cannot get stripe rules (%s), use default\n",
+                       src, strerror(-rc));
+               return 0;
+       }
+
+       rc = fsetxattr(dst_fd, XATTR_LUSTRE_LOV, lov_buf, lum_size, XATTR_CREATE);
+       if (rc < 0) {
+               CT_ERROR("'%s' cannot set striping (%s)\n",
+                        dst, strerror(errno));
+               return -errno;
+       }
+
+       return 0;
+}
+
+/* non-blocking read or write */
+static int nonblock_rw(bool wr, int fd, char *buf, int size)
+{
+       int rc;
+
+       if (wr)
+               rc = write(fd, buf, size);
+       else
+               rc = read(fd, buf, size);
+
+       if ((rc < 0) && (errno == -EAGAIN)) {
+               fd_set set;
+               struct timeval timeout;
+
+               timeout.tv_sec = opt.o_report_int;
+
+               FD_ZERO(&set);
+               FD_SET(fd, &set);
+               if (wr)
+                       rc = select(FD_SETSIZE, NULL, &set, NULL, &timeout);
+               else
+                       rc = select(FD_SETSIZE, &set, NULL, NULL, &timeout);
+               if (rc < 0)
+                       return -errno;
+               if (rc == 0)
+                       /* Timed out, we read nothing */
+                       return -EAGAIN;
+
+               /* Should be available now */
+               if (wr)
+                       rc = write(fd, buf, size);
+               else
+                       rc = read(fd, buf, size);
+       }
+
+       if (rc < 0)
+               rc = -errno;
+
+       return rc;
+}
+
+static int ct_copy_data(struct hsm_copyaction_private *hcp, const char *src,
+                       const char *dst, int src_fd, int dst_fd,
+                       const struct hsm_action_item *hai, long hal_flags)
+{
+       struct hsm_extent        he;
+       struct stat              src_st;
+       struct stat              dst_st;
+       char                    *buf;
+       __u64                    wpos = 0;
+       __u64                    rpos = 0;
+       __u64                    rlen;
+       time_t                   last_print_time = time(0);
+       int                      rsize;
+       int                      wsize;
+       int                      bufoff = 0;
+       int                      rc = 0;
+
+       CT_TRACE("going to copy data from '%s' to %s\n", src, dst);
+
+       buf = malloc(opt.o_chunk_size);
+       if (buf == NULL)
+               return -ENOMEM;
+
+       if (fstat(src_fd, &src_st) < 0) {
+               CT_ERROR("'%s' stat failed (%s)\n", src, strerror(errno));
+               return -errno;
+       }
+
+       if (!S_ISREG(src_st.st_mode)) {
+               CT_ERROR("'%s' not a regular file\n", src);
+               return -EINVAL;
+       }
+
+       rc = lseek(src_fd, hai->hai_extent.offset, SEEK_SET);
+       if (rc < 0) {
+               CT_ERROR("'%s' seek to read to "LPU64" (len %zu)"
+                        " failed (%s)\n",
+                        src, hai->hai_extent.offset, src_st.st_size,
+                        strerror(errno));
+               rc = -errno;
+               goto out;
+       }
+
+       if (fstat(dst_fd, &dst_st) < 0) {
+               CT_ERROR("'%s' stat failed (%s)\n", dst, strerror(errno));
+               return -errno;
+       }
+
+       if (!S_ISREG(dst_st.st_mode)) {
+               CT_ERROR("'%s' not a regular file\n", dst);
+               return -EINVAL;
+       }
+
+       rc = lseek(dst_fd, hai->hai_extent.offset, SEEK_SET);
+       if (rc < 0) {
+               CT_ERROR("'%s' seek to write to "LPU64" failed (%s)\n", src,
+                        hai->hai_extent.offset, strerror(errno));
+               rc = -errno;
+               goto out;
+       }
+
+       he.offset = hai->hai_extent.offset;
+       he.length = 0;
+       rc = llapi_hsm_action_progress(hcp, &he, 0);
+       if (rc) {
+               /* Action has been canceled or something wrong
+                * is happening. Stop copying data. */
+               CT_ERROR("%s->'%s' progress returned err %d\n", src, dst, rc);
+               goto out;
+       }
+
+       errno = 0;
+       /* Don't read beyond a given extent */
+       rlen = (hai->hai_extent.length == -1LL) ?
+               src_st.st_size : hai->hai_extent.length;
+
+       while (wpos < rlen) {
+               int chunk = (rlen - wpos > opt.o_chunk_size) ?
+                           opt.o_chunk_size : rlen - wpos;
+
+               /* Only read more if we wrote everything in the buffer */
+               if (wpos == rpos) {
+                       rsize = nonblock_rw(0, src_fd, buf, chunk);
+                       if (rsize == 0)
+                               /* EOF */
+                               break;
+
+                       if (rsize == -EAGAIN) {
+                               /* Timed out */
+                               rsize = 0;
+                               if (rpos == 0) {
+                                       /* Haven't read anything yet, let's
+                                        * give it back to the coordinator
+                                        * for rescheduling */
+                                       rc = -EAGAIN;
+                                       break;
+                               }
+                       }
+
+                       if (rsize < 0) {
+                               CT_ERROR("'%s' read failed (%s)\n", src,
+                                        strerror(-rsize));
+                               rc = rsize;
+                               break;
+                       }
+
+                       rpos += rsize;
+                       bufoff = 0;
+               }
+
+               wsize = nonblock_rw(1, dst_fd, buf + bufoff, rpos - wpos);
+               if (wsize == -EAGAIN)
+                       /* Timed out */
+                       wsize = 0;
+
+               if (wsize < 0) {
+                       CT_ERROR("'%s' write failed (%s)\n", dst,
+                                strerror(-wsize));
+                       rc = wsize;
+                       break;
+               }
+
+               wpos += wsize;
+               bufoff += wsize;
+
+               if (opt.o_bandwidth != 0) {
+                       static unsigned long long       tot_bytes;
+                       static time_t                   start_time, last_time;
+                       time_t                          now = time(0);
+                       double                          tot_time, excess;
+                       unsigned int                    sleep_time;
+
+                       if (now > last_time + 5) {
+                               tot_bytes = 0;
+                               start_time = last_time = now;
+                       }
+
+                       tot_bytes += wsize;
+                       tot_time = now - start_time;
+                       if (tot_time < 1)
+                               tot_time = 1;
+
+                       excess = tot_bytes - tot_time * opt.o_bandwidth;
+                       sleep_time = excess * 1000000 / opt.o_bandwidth;
+                       if ((now - start_time) % 10 == 1)
+                               CT_TRACE("bandwith control: excess=%E"
+                                        " sleep for %dus\n",
+                                        excess, sleep_time);
+
+                       if (excess > 0)
+                               usleep(sleep_time);
+
+                       last_time = now;
+               }
+
+               if (time(0) >= last_print_time + opt.o_report_int) {
+                       last_print_time = time(0);
+                       CT_TRACE("%%"LPU64" ", 100 * wpos / rlen);
+                       he.length = wpos;
+                       rc = llapi_hsm_action_progress(hcp, &he, 0);
+                       if (rc) {
+                               /* Action has been canceled or something wrong
+                                * is happening. Stop copying data. */
+                               CT_ERROR("%s->'%s' progress returned err %d\n",
+                                        src, dst, rc);
+                               goto out;
+                       }
+               }
+               rc = 0;
+       }
+       CT_TRACE("\n");
+
+out:
+       /*
+        * truncate restored file
+        * size is taken from the archive this is done to support
+        * restore after a force release which leaves the file with the
+        * wrong size (can big bigger than the new size)
+        */
+       if ((hai->hai_action == HSMA_RESTORE) &&
+           (src_st.st_size < dst_st.st_size)) {
+               /*
+                * make sure the file is on disk before reporting success.
+                */
+               rc = ftruncate(dst_fd, src_st.st_size);
+               if (rc < 0) {
+                       rc = -errno;
+                       CT_ERROR("'%s' final truncate to %lu failed (%s)\n",
+                                dst, src_st.st_size, strerror(-rc));
+                       err_major++;
+               }
+       }
+
+       if (rc == 0) {
+               rc = fsync(dst_fd);
+               if (rc < 0) {
+                       rc = -errno;
+                       CT_ERROR("'%s' fsync failed (%s)\n", dst,
+                                strerror(-rc));
+                       err_major++;
+               }
+       }
+
+       free(buf);
+
+       return rc;
+}
+
+/* Copy file attributes from file src to file dest */
+static int ct_copy_attr(const char *src, const char *dst, int src_fd,
+                       int dst_fd)
+{
+       struct stat     st;
+       struct timeval  times[2];
+
+       if (fstat(src_fd, &st) < 0) {
+               CT_ERROR("'%s' stat failed (%s)\n",
+                    src, strerror(errno));
+               return -errno;
+       }
+
+       times[0].tv_sec = st.st_atime;
+       times[0].tv_usec = 0;
+       times[1].tv_sec = st.st_mtime;
+       times[1].tv_usec = 0;
+       if (fchmod(dst_fd, st.st_mode) < 0 ||
+           fchown(dst_fd, st.st_uid, st.st_gid) < 0 ||
+           futimes(dst_fd, times) < 0)
+               CT_ERROR("'%s' fchmod fchown or futimes failed (%s)\n", src,
+                        strerror(errno));
+               return -errno;
+       return 0;
+}
+
+static int ct_copy_xattr(const char *src, const char *dst, int src_fd,
+                        int dst_fd, bool is_restore)
+{
+       char     list[XATTR_LIST_MAX];
+       char     value[XATTR_SIZE_MAX];
+       char    *name;
+       ssize_t  list_len;
+       int      rc;
+
+       list_len = flistxattr(src_fd, list, sizeof(list));
+       if (list_len < 0)
+               return -errno;
+
+       name = list;
+       while (name < list + list_len) {
+               rc = fgetxattr(src_fd, name, value, sizeof(value));
+               if (rc < 0)
+                       return -errno;
+
+               /* when we restore, we do not restore lustre xattr */
+               if (!is_restore ||
+                   (strncmp(XATTR_TRUSTED_PREFIX, name,
+                            sizeof(XATTR_TRUSTED_PREFIX) - 1) != 0)) {
+                       rc = fsetxattr(dst_fd, name, value, rc, 0);
+                       CT_TRACE("'%s' fsetxattr of '%s' rc=%d (%s)\n",
+                                dst, name, rc, strerror(errno));
+                       /* lustre.* attrs aren't supported on other FS's */
+                       if (rc < 0 && errno != EOPNOTSUPP) {
+                               CT_ERROR("'%s' fsetxattr of '%s' failed (%s)\n",
+                                        dst, name, strerror(errno));
+                               return -errno;
+                       }
+               }
+               name += strlen(name) + 1;
+       }
+
+       return 0;
+}
+
+static int ct_path_lustre(char *buf, int sz, const char *mnt,
+                         const lustre_fid *fid)
+{
+       return snprintf(buf, sz, "%s/%s/fid/"DFID_NOBRACE, mnt,
+                       dot_lustre_name, PFID(fid));
+}
+
+static int ct_path_archive(char *buf, int sz, const char *archive_dir,
+                          const lustre_fid *fid)
+{
+       return snprintf(buf, sz, "%s/%04x/%04x/%04x/%04x/%04x/%04x/"
+                       DFID_NOBRACE, archive_dir,
+                       (fid)->f_oid       & 0xFFFF,
+                       (fid)->f_oid >> 16 & 0xFFFF,
+                       (unsigned int)((fid)->f_seq       & 0xFFFF),
+                       (unsigned int)((fid)->f_seq >> 16 & 0xFFFF),
+                       (unsigned int)((fid)->f_seq >> 32 & 0xFFFF),
+                       (unsigned int)((fid)->f_seq >> 48 & 0xFFFF),
+                       PFID(fid));
+}
+
+static bool ct_is_retryable(int err)
+{
+       return err == -ETIMEDOUT;
+}
+
+static int ct_begin(struct hsm_copyaction_private **phcp,
+                   const struct hsm_action_item *hai)
+{
+       char     src[PATH_MAX];
+       int      rc;
+
+       rc = llapi_hsm_action_begin(phcp, ctdata, hai, false);
+       if (rc < 0) {
+               ct_path_lustre(src, sizeof(src), opt.o_mnt, &hai->hai_fid);
+               CT_ERROR("'%s' copy start failed (%s)\n", src, strerror(-rc));
+       }
+
+       return rc;
+}
+
+static int ct_fini(struct hsm_copyaction_private **phcp,
+                  const struct hsm_action_item *hai, int flags, int ct_rc)
+{
+       char    lstr[PATH_MAX];
+       int     rc;
+
+       CT_TRACE("Action completed, notifying coordinator "
+                "cookie="LPX64", FID="DFID", flags=%d err=%d\n",
+                hai->hai_cookie, PFID(&hai->hai_fid),
+                flags, -ct_rc);
+
+       ct_path_lustre(lstr, sizeof(lstr), opt.o_mnt, &hai->hai_fid);
+       rc = llapi_hsm_action_end(phcp, &hai->hai_extent, flags, abs(ct_rc));
+       if (rc == -ECANCELED)
+               CT_ERROR("'%s' completed action has been canceled: "
+                        "cookie="LPX64", FID="DFID"\n", lstr, hai->hai_cookie,
+                        PFID(&hai->hai_fid));
+       else if (rc < 0)
+               CT_ERROR("'%s' copy end failed (%s)\n", lstr, strerror(-rc));
+       else
+               CT_TRACE("'%s' copy end ok (rc=%d)\n", lstr, rc);
+
+       return rc;
+}
+
+static int ct_archive(const struct hsm_action_item *hai, const long hal_flags)
+{
+       struct hsm_copyaction_private   *hcp = NULL;
+       char                             src[PATH_MAX];
+       char                             dst[PATH_MAX];
+       int                              rc;
+       int                              rcf = 0;
+       bool                             rename_needed = false;
+       int                              ct_flags = 0;
+       int                              open_flags;
+       int                              src_fd = -1;
+       int                              dst_fd = -1;
+
+       rc = ct_begin(&hcp, hai);
+       if (rc < 0)
+               goto fini_major;
+
+       /* we fill archive so:
+        * source = data FID
+        * destination = lustre FID
+        */
+       ct_path_lustre(src, sizeof(src), opt.o_mnt, &hai->hai_dfid);
+       ct_path_archive(dst, sizeof(dst), opt.o_hsm_root, &hai->hai_fid);
+       if (hai->hai_extent.length == -1) {
+               /* whole file, write it to tmp location and atomically
+                * replace old archived file */
+               strncat(dst, "_tmp", sizeof(dst) - strlen(dst) - 1);
+               /* we cannot rely on the same test because ct_copy_data()
+                * updates hai_extent.length */
+               rename_needed = true;
+       }
+
+       CT_TRACE("'%s' archived to %s\n", src, dst);
+
+       if (opt.o_dry_run) {
+               rc = 0;
+               goto fini_major;
+       }
+
+       rc = ct_mkdir_p(dst);
+       if (rc < 0) {
+               CT_ERROR("'%s' mkdir_p failed (%s)\n", dst, strerror(-rc));
+               goto fini_major;
+       }
+
+       src_fd = open(src, O_RDONLY | O_NOATIME | O_NONBLOCK | O_NOFOLLOW);
+       if (src_fd == -1) {
+               CT_ERROR("'%s' open read failed (%s)\n", src, strerror(errno));
+               rc = -errno;
+               goto fini_major;
+       }
+
+       open_flags = O_WRONLY | O_NOFOLLOW | O_NONBLOCK;
+       /* If extent is specified, don't truncate an old archived copy */
+       open_flags |= ((hai->hai_extent.length == -1) ? O_TRUNC : 0) | O_CREAT;
+
+       dst_fd = open(dst, open_flags, FILE_PERM);
+       if (dst_fd == -1) {
+               CT_ERROR("'%s' open write failed (%s)\n", dst, strerror(errno));
+               rc = -errno;
+               goto fini_major;
+       }
+
+       /* saving stripe is not critical */
+       rc = ct_save_stripe(src_fd, src, dst);
+       if (rc < 0)
+               CT_ERROR("'%s' cannot save file striping info in '%s' (%s)\n",
+                        src, dst, strerror(-rc));
+
+       rc = ct_copy_data(hcp, src, dst, src_fd, dst_fd, hai, hal_flags);
+       if (rc < 0) {
+               CT_ERROR("'%s' data copy failed to '%s' (%s)\n",
+                        src, dst, strerror(-rc));
+               goto fini_major;
+       }
+
+       CT_TRACE("'%s' data archived to '%s' done\n", src, dst);
+
+       /* attrs will remain on the MDS; no need to copy them, except possibly
+         for disaster recovery */
+       if (opt.o_copy_attrs) {
+               rc = ct_copy_attr(src, dst, src_fd, dst_fd);
+               if (rc < 0) {
+                       CT_ERROR("'%s' attr copy failed to '%s' (%s)\n",
+                                src, dst, strerror(-rc));
+                       rcf = rc;
+               }
+               CT_TRACE("'%s' attr file copied to archive '%s'\n",
+                        src, dst);
+       }
+
+       /* xattrs will remain on the MDS; no need to copy them, except possibly
+        for disaster recovery */
+       if (opt.o_copy_xattrs) {
+               rc = ct_copy_xattr(src, dst, src_fd, dst_fd, false);
+               if (rc < 0) {
+                       CT_ERROR("'%s' xattr copy failed to '%s' (%s)\n",
+                                src, dst, strerror(-rc));
+                       rcf = rcf ? rcf : rc;
+               }
+               CT_ERROR("'%s' xattr file copied to archive '%s'\n",
+                        src, dst);
+       }
+
+       if (rename_needed == true) {
+               char     tmp_src[PATH_MAX];
+               char     tmp_dst[PATH_MAX];
+
+               /* atomically replace old archived file */
+               ct_path_archive(src, sizeof(src), opt.o_hsm_root,
+                               &hai->hai_fid);
+               rc = rename(dst, src);
+               if (rc < 0) {
+                       CT_ERROR("'%s' renamed to '%s' failed (%s)\n", dst, src,
+                                strerror(errno));
+                       rc = -errno;
+                       goto fini_major;
+               }
+               /* rename lov file */
+               snprintf(tmp_src, sizeof(tmp_src), "%s.lov", src);
+               snprintf(tmp_dst, sizeof(tmp_dst), "%s.lov", dst);
+               rc = rename(tmp_dst, tmp_src);
+               if (rc < 0)
+                       CT_ERROR("'%s' renamed to '%s' failed (%s)\n",
+                                tmp_dst, tmp_src, strerror(errno));
+       }
+
+       if (opt.o_shadow_tree) {
+               /* Create a namespace of softlinks that shadows the original
+                * Lustre namespace.  This will only be current at
+                * time-of-archive (won't follow renames).
+                * WARNING: release won't kill these links; a manual
+                * cleanup of dead links would be required.
+                */
+               char             buf[PATH_MAX];
+               long long        recno = -1;
+               int              linkno = 0;
+               char            *ptr;
+               int              depth = 0;
+               int              sz;
+
+               sprintf(buf, DFID, PFID(&hai->hai_fid));
+               sprintf(src, "%s/shadow/", opt.o_hsm_root);
+
+               ptr = opt.o_hsm_root;
+               while (*ptr)
+                       (*ptr++ == '/') ? depth-- : 0;
+
+               rc = llapi_fid2path(opt.o_mnt, buf, src + strlen(src),
+                                   sizeof(src) - strlen(src), &recno, &linkno);
+               if (rc < 0) {
+                       CT_ERROR("'%s' fid2path failed (%s)\n", buf,
+                                strerror(-rc));
+                       rcf = rcf ? rcf : rc;
+                       goto fini_minor;
+               }
+
+               /* Figure out how many parent dirs to symlink back */
+               ptr = src;
+               while (*ptr)
+                       (*ptr++ == '/') ? depth++ : 0;
+               sprintf(buf, "..");
+               while (--depth > 1)
+                       strcat(buf, "/..");
+
+               ct_path_archive(dst, sizeof(dst), buf, &hai->hai_fid);
+
+               if (ct_mkdir_p(src)) {
+                       CT_ERROR("'%s' mkdir_p failed (%s)\n", src,
+                                strerror(errno));
+                       rcf = rcf ? rcf : -errno;
+                       goto fini_minor;
+               }
+               /* symlink already exists ? */
+               sz = readlink(src, buf, sizeof(buf));
+               if (sz >= 0) {
+                       buf[sz] = '\0';
+                       if (sz == 0 || strncmp(buf, dst, sz) != 0) {
+                               if (unlink(src) && errno != ENOENT) {
+                                       CT_ERROR("'%s' unlink symlink failed "
+                                                "(%s)\n", src,
+                                                strerror(errno));
+                                       rcf = rcf ? rcf : -errno;
+                                       goto fini_minor;
+                               /* unlink old symlink done */
+                               CT_TRACE("'%s' remove old symlink pointing"
+                                        " to '%s'\n", src, buf);
+                               }
+                       } else {
+                               /* symlink already ok */
+                               CT_TRACE("'%s' symlink already pointing"
+                                        " to '%s'\n", src, dst);
+                               rcf = 0;
+                               goto fini_minor;
+                       }
+               }
+               if (symlink(dst, src)) {
+                       CT_ERROR("'%s' symlink to '%s' failed (%s)\n", src, dst,
+                                strerror(errno));
+                       rcf = rcf ? rcf : -errno;
+                       goto fini_minor;
+               }
+               CT_TRACE("'%s' symlink to '%s' done\n", src, dst);
+       }
+fini_minor:
+       if (rcf)
+               err_minor++;
+       goto out;
+
+
+fini_major:
+       err_major++;
+
+       unlink(dst);
+       if (ct_is_retryable(rc))
+               ct_flags |= HP_FLAG_RETRY;
+
+       rcf = rc;
+
+out:
+       if (!(src_fd < 0))
+               close(src_fd);
+
+       if (!(dst_fd < 0))
+               close(dst_fd);
+
+       if (hcp != NULL)
+               rc = ct_fini(&hcp, hai, ct_flags, rcf);
+
+       return rc;
+}
+
+static int ct_restore(const struct hsm_action_item *hai, const long hal_flags)
+{
+       struct hsm_copyaction_private   *hcp = NULL;
+       char                             src[PATH_MAX];
+       char                             dst[PATH_MAX];
+       int                              rc;
+       int                              flags = 0;
+       int                              src_fd = -1;
+       int                              dst_fd = -1;
+       lustre_fid                       dfid;
+
+       rc = ct_begin(&hcp, hai);
+       if (rc)
+               goto fini;
+
+       /* we fill lustre so:
+        * source = lustre FID in the backend
+        * destination = data FID = volatile file
+        */
+
+       /* build backend file name from released file FID */
+       ct_path_archive(src, sizeof(src), opt.o_hsm_root, &hai->hai_fid);
+
+       /* get the FID of the volatile file */
+       rc = llapi_hsm_action_get_dfid(hcp, &dfid);
+       if (rc < 0) {
+               CT_ERROR("restoring "DFID", cannot get FID of created volatile"
+                        " file (%s)\n", PFID(&hai->hai_fid), strerror(-rc));
+               goto fini;
+       }
+
+       /* build volatile "file name", for messages */
+       snprintf(dst, sizeof(dst), "{VOLATILE}="DFID, PFID(&dfid));
+
+       CT_TRACE("'%s' restore data to '%s'\n", src, dst);
+
+       if (opt.o_dry_run) {
+               rc = 0;
+               goto fini;
+       }
+
+       src_fd = open(src, O_RDONLY | O_NOATIME | O_NONBLOCK | O_NOFOLLOW);
+       if (src_fd < 0) {
+               CT_ERROR("'%s' open for read failed (%s)\n", src,
+                        strerror(errno));
+               rc = -errno;
+               goto fini;
+       }
+
+       dst_fd = llapi_hsm_action_get_fd(hcp);
+
+       /* the layout cannot be allocated through .fid so we have to
+        * restore a layout */
+       rc = ct_restore_stripe(src, dst, dst_fd);
+       if (rc) {
+               CT_ERROR("'%s' cannot restore file striping info from '%s'"
+                        " (%s)\n", dst, src, strerror(-rc));
+               err_major++;
+               goto fini;
+       }
+
+       rc = ct_copy_data(hcp, src, dst, src_fd, dst_fd, hai, hal_flags);
+       if (rc < 0) {
+               CT_ERROR("'%s' data copy to '%s' failed (%s)\n", src, dst,
+                        strerror(-rc));
+               err_major++;
+               if (ct_is_retryable(rc))
+                       flags |= HP_FLAG_RETRY;
+               goto fini;
+       }
+
+       CT_TRACE("'%s' data restore done to %s\n", src, dst);
+
+fini:
+       if (hcp != NULL)
+               rc = ct_fini(&hcp, hai, flags, rc);
+
+       /* object swaping is done by cdt at copy end, so close of volatile file
+        * cannot be done before */
+       if (!(src_fd < 0))
+               close(src_fd);
+
+       if (!(dst_fd < 0))
+               close(dst_fd);
+
+       return rc;
+}
+
+static int ct_remove(const struct hsm_action_item *hai, const long hal_flags)
+{
+       struct hsm_copyaction_private   *hcp = NULL;
+       char                             dst[PATH_MAX];
+       int                              rc;
+
+       rc = ct_begin(&hcp, hai);
+       if (rc < 0)
+               goto fini;
+
+       ct_path_archive(dst, sizeof(dst), opt.o_hsm_root, &hai->hai_fid);
+
+       CT_TRACE("'%s' removed file\n", dst);
+
+       if (opt.o_dry_run) {
+               rc = 0;
+               goto fini;
+       }
+
+       rc = unlink(dst);
+       if (rc < 0) {
+               rc = -errno;
+               CT_ERROR("'%s' unlink failed (%s)\n", dst, strerror(-rc));
+               err_minor++;
+               goto fini;
+       }
+
+fini:
+       if (hcp != NULL)
+               rc = ct_fini(&hcp, hai, 0, rc);
+
+       return rc;
+}
+
+static int ct_report_error(const struct hsm_action_item *hai, int flags,
+                          int errval)
+{
+       struct hsm_copyaction_private   *hcp;
+       int                              rc;
+
+       rc = llapi_hsm_action_begin(&hcp, ctdata, hai, true);
+       if (rc)
+               return rc;
+
+       rc = llapi_hsm_action_end(&hcp, &hai->hai_extent, flags, abs(errval));
+
+       return rc;
+}
+
+static int ct_process_item(struct hsm_action_item *hai, const long hal_flags)
+{
+       int     rc = 0;
+
+       if (opt.o_verbose >= LLAPI_MSG_INFO || opt.o_dry_run) {
+               /* Print the original path */
+               char            fid[128];
+               char            path[PATH_MAX];
+               long long       recno = -1;
+               int             linkno = 0;
+
+               sprintf(fid, DFID, PFID(&hai->hai_fid));
+               CT_TRACE("'%s' action %s reclen %d, cookie="LPX64"\n",
+                        fid, hsm_copytool_action2name(hai->hai_action),
+                        hai->hai_len, hai->hai_cookie);
+               rc = llapi_fid2path(opt.o_mnt, fid, path,
+                                   sizeof(path), &recno, &linkno);
+               if (rc < 0)
+                       CT_ERROR("'%s' fid2path failed (%s)\n", fid,
+                                strerror(-rc));
+               else
+                       CT_TRACE("'%s' processing file\n", path);
+       }
+
+       switch (hai->hai_action) {
+       /* set err_major, minor inside these functions */
+       case HSMA_ARCHIVE:
+               rc = ct_archive(hai, hal_flags);
+               break;
+       case HSMA_RESTORE:
+               rc = ct_restore(hai, hal_flags);
+               break;
+       case HSMA_REMOVE:
+               rc = ct_remove(hai, hal_flags);
+               break;
+       case HSMA_CANCEL:
+               CT_TRACE("'%s' cancel not implemented\n", opt.o_mnt);
+               /* Don't report progress to coordinator for this cookie:
+                * the copy function will get ECANCELED when reporting
+                * progress. */
+               err_minor++;
+               return 0;
+               break;
+       default:
+               CT_ERROR("'%s' unknown action %d\n", opt.o_mnt,
+                        hai->hai_action);
+               err_minor++;
+               ct_report_error(hai, 0, -EINVAL);
+       }
+
+       return 0;
+}
+
+struct ct_th_data {
+       long                     hal_flags;
+       struct hsm_action_item  *hai;
+};
+
+static void *ct_thread(void *data)
+{
+       struct ct_th_data *cttd = data;
+       int rc;
+
+       rc = ct_process_item(cttd->hai, cttd->hal_flags);
+
+       free(cttd->hai);
+       free(cttd);
+       pthread_exit((void *)(intptr_t)rc);
+}
+
+static int ct_process_item_async(const struct hsm_action_item *hai,
+                                long hal_flags)
+{
+       pthread_attr_t           attr;
+       pthread_t                thread;
+       struct ct_th_data       *data;
+       int                      rc;
+
+       data = malloc(sizeof(*data));
+       if (data == NULL)
+               return -ENOMEM;
+
+       data->hai = malloc(hai->hai_len);
+       if (data->hai == NULL) {
+               free(data);
+               return -ENOMEM;
+       }
+
+       memcpy(data->hai, hai, hai->hai_len);
+       data->hal_flags = hal_flags;
+
+       rc = pthread_attr_init(&attr);
+       if (rc != 0) {
+               CT_ERROR("'%s' pthread_attr_init: %s\n", opt.o_mnt,
+                        strerror(rc));
+               free(data->hai);
+               free(data);
+               return -rc;
+       }
+
+       pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+       rc = pthread_create(&thread, &attr, ct_thread, data);
+       if (rc != 0)
+               CT_ERROR("'%s' thread create: (%s)\n", opt.o_mnt, strerror(rc));
+
+       pthread_attr_destroy(&attr);
+       return 0;
+}
+
+static int ct_import_one(const char *src, const char *dst)
+{
+       char            newarc[PATH_MAX];
+       lustre_fid      fid;
+       struct stat     st;
+       int             rc;
+
+       CT_TRACE("'%s' importing from %s\n", dst, src);
+
+       if (stat(src, &st) < 0) {
+               CT_ERROR("'%s' stat failed (%s)\n", src, strerror(errno));
+               return -errno;
+       }
+
+       if (opt.o_dry_run)
+               return 0;
+
+       rc = llapi_hsm_import(dst,
+                             opt.o_archive_cnt ? opt.o_archive_id[0] : 0,
+                             &st, 0, 0, 0, 0, NULL, &fid);
+       if (rc < 0) {
+               CT_ERROR("'%s' import from '%s' failed (%s)\n", dst, src,
+                        strerror(-rc));
+               return -rc;
+       }
+
+       ct_path_archive(newarc, sizeof(newarc), opt.o_hsm_root, &fid);
+
+       rc = ct_mkdir_p(newarc);
+       if (rc < 0) {
+               CT_ERROR("'%s' mkdir_p failed (%s)\n", newarc, strerror(-rc));
+               err_major++;
+               return rc;
+
+       }
+
+       /* Lots of choices now: mv, ln, ln -s ? */
+       rc = link(src, newarc); /* hardlink */
+       if (rc < 0) {
+               CT_ERROR("'%s' link to '%s' failed (%s)\n", newarc, src,
+                        strerror(errno));
+               err_major++;
+               return -errno;
+       }
+       CT_TRACE("'%s' imported from '%s'=='%s'\n", dst, newarc, src);
+
+       return 0;
+}
+
+static char *path_concat(const char *dirname, const char *basename)
+{
+       char    *result;
+       int      dirlen = strlen(dirname);
+
+       result = malloc(dirlen + strlen(basename) + 2);
+       if (result == NULL)
+               return NULL;
+
+       memcpy(result, dirname, dirlen);
+       result[dirlen] = '/';
+       strcpy(result + dirlen + 1, basename);
+
+       return result;
+}
+
+static int ct_import_recurse(const char *relpath)
+{
+       DIR             *dir;
+       struct dirent    ent, *cookie = NULL;
+       char            *srcpath, *newpath;
+       int              rc;
+
+       if (relpath == NULL)
+               return -EINVAL;
+
+       srcpath = path_concat(opt.o_hsm_root, relpath);
+       if (srcpath == NULL) {
+               err_major++;
+               return -ENOMEM;
+       }
+
+       dir = opendir(srcpath);
+       if (dir == NULL) {
+               /* Not a dir, or error */
+               if (errno == ENOTDIR) {
+                       /* Single regular file case, treat o_dst as absolute
+                          final location. */
+                       rc = ct_import_one(srcpath, opt.o_dst);
+               } else {
+                       CT_ERROR("'%s' opendir failed (%s)\n", srcpath,
+                                strerror(errno));
+                       err_major++;
+                       rc = -errno;
+               }
+               free(srcpath);
+               return rc;
+       }
+       free(srcpath);
+
+       while (1) {
+               rc = readdir_r(dir, &ent, &cookie);
+               if (rc != 0) {
+                       CT_ERROR("'%s' readdir_r failed (%s)\n", relpath,
+                                strerror(errno));
+                       err_major++;
+                       rc = -errno;
+                       goto out;
+               } else if ((rc == 0) && (cookie == NULL)) {
+                       /* end of directory */
+                       break;
+               }
+
+               if (!strcmp(ent.d_name, ".") ||
+                   !strcmp(ent.d_name, ".."))
+                       continue;
+
+               /* New relative path */
+               newpath = path_concat(relpath, ent.d_name);
+               if (newpath == NULL) {
+                       err_major++;
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               if (ent.d_type == DT_DIR) {
+                       rc = ct_import_recurse(newpath);
+               } else {
+                       char src[PATH_MAX];
+                       char dst[PATH_MAX];
+
+                       sprintf(src, "%s/%s", opt.o_hsm_root, newpath);
+                       sprintf(dst, "%s/%s", opt.o_dst, newpath);
+                       /* Make the target dir in the Lustre fs */
+                       rc = ct_mkdir_p(dst);
+                       if (rc == 0) {
+                               /* Import the file */
+                               rc = ct_import_one(src, dst);
+                       } else {
+                               CT_ERROR("'%s' ct_mkdir_p failed (%s)\n", dst,
+                                        strerror(-rc));
+                               err_major++;
+                       }
+               }
+
+               if (rc != 0) {
+                       CT_ERROR("'%s' importing failed\n", newpath);
+                       if (err_major && opt.o_abort_on_error) {
+                               free(newpath);
+                               goto out;
+                       }
+               }
+               free(newpath);
+       }
+
+       rc = 0;
+out:
+       closedir(dir);
+       return rc;
+}
+
+static int ct_rebind_one(const lustre_fid *old_fid, const lustre_fid *new_fid)
+{
+       char src[PATH_MAX];
+       char dst[PATH_MAX];
+
+       CT_TRACE("rebind "DFID" to "DFID"\n", PFID(old_fid), PFID(new_fid));
+
+       ct_path_archive(src, sizeof(src), opt.o_hsm_root, old_fid);
+       ct_path_archive(dst, sizeof(dst), opt.o_hsm_root, new_fid);
+
+       if (!opt.o_dry_run) {
+               ct_mkdir_p(dst);
+               if (rename(src, dst)) {
+                       CT_ERROR("'%s' rename to '%s' failed (%s)\n", src, dst,
+                                strerror(errno));
+                       return -errno;
+               }
+               /* rename lov file */
+               strncat(src, ".lov", sizeof(src) - strlen(src) - 1);
+               strncat(dst, ".lov", sizeof(dst) - strlen(dst) - 1);
+               if (rename(src, dst))
+                       CT_ERROR("'%s' rename to '%s' failed (%s)\n", src, dst,
+                                strerror(errno));
+
+       }
+       return 0;
+}
+
+static bool fid_is_file(lustre_fid *fid)
+{
+       return fid_is_norm(fid) || fid_is_igif(fid);
+}
+
+static bool should_ignore_line(const char *line)
+{
+       int     i;
+
+       for (i = 0; line[i] != '\0'; i++) {
+               if (isspace(line[i]))
+                       continue;
+               else if (line[i] == '#')
+                       return true;
+               else
+                       return false;
+       }
+
+       return true;
+}
+
+static int ct_rebind_list(const char *list)
+{
+       int              rc;
+       FILE            *filp;
+       ssize_t          r;
+       char            *line = NULL;
+       size_t           line_size = 0;
+       unsigned int     nl = 0;
+       unsigned int     ok = 0;
+
+       filp = fopen(list, "r");
+       if (filp == NULL) {
+               CT_ERROR("'%s' open failed (%s)\n", list, strerror(errno));
+               return -errno;
+       }
+
+       /* each line consists of 2 FID */
+       while ((r = getline(&line, &line_size, filp)) != -1) {
+               lustre_fid      old_fid;
+               lustre_fid      new_fid;
+
+               /* Ignore empty and commented out ('#...') lines. */
+               if (should_ignore_line(line))
+                       continue;
+
+               nl++;
+
+               rc = sscanf(line, SFID" "SFID, RFID(&old_fid), RFID(&new_fid));
+               if (rc != 6 || !fid_is_file(&old_fid) ||
+                   !fid_is_file(&new_fid)) {
+                       CT_ERROR("'%s' FID expected near '%s', line %u\n",
+                                list, line, nl);
+                       err_major++;
+                       continue;
+               }
+
+               if (ct_rebind_one(&old_fid, &new_fid))
+                       err_major++;
+               else
+                       ok++;
+       }
+
+       fclose(filp);
+
+       if (line)
+               free(line);
+
+       /* return 0 if all rebinds were sucessful */
+       CT_TRACE("'%s' %u lines read, %u rebind successful\n", list, nl, ok);
+
+       return ok == nl ? 0 : -1;
+}
+
+static int ct_rebind(void)
+{
+       int     rc;
+
+       if (opt.o_dst) {
+               lustre_fid      old_fid;
+               lustre_fid      new_fid;
+
+               if (sscanf(opt.o_src, SFID, RFID(&old_fid)) != 3 ||
+                   !fid_is_file(&old_fid)) {
+                       CT_ERROR("'%s' invalid FID format\n", opt.o_src);
+                       return -EINVAL;
+               }
+
+               if (sscanf(opt.o_dst, SFID, RFID(&new_fid)) != 3 ||
+                   !fid_is_file(&new_fid)) {
+                       CT_ERROR("'%s' invalid FID format\n", opt.o_dst);
+                       return -EINVAL;
+               }
+
+               rc = ct_rebind_one(&old_fid, &new_fid);
+
+               return rc;
+       }
+
+       /* o_src is a list file */
+       rc = ct_rebind_list(opt.o_src);
+
+       return rc;
+}
+
+static int ct_dir_level_max(const char *dirpath, __u16 *sub_seqmax)
+{
+       DIR             *dir;
+       int              rc;
+       __u16            sub_seq;
+       struct dirent    ent, *cookie = NULL;
+
+       *sub_seqmax = 0;
+
+       dir = opendir(dirpath);
+       if (dir == NULL) {
+               rc = -errno;
+               CT_ERROR("'%s' failed to open directory (%s)\n", opt.o_hsm_root,
+                        strerror(-rc));
+               return rc;
+       }
+
+       while ((rc = readdir_r(dir, &ent, &cookie)) == 0) {
+               if (cookie == NULL)
+                       /* end of directory.
+                        * rc is 0 and seqmax contains the max value. */
+                       goto out;
+
+               if (!strcmp(ent.d_name, ".") || !strcmp(ent.d_name, ".."))
+                       continue;
+
+               if (sscanf(ent.d_name, "%hx", &sub_seq) != 1) {
+                       CT_TRACE("'%s' unexpected dirname format, "
+                                "skip entry.\n", ent.d_name);
+                       continue;
+               }
+               if (sub_seq > *sub_seqmax)
+                       *sub_seqmax = sub_seq;
+       }
+       rc = -errno;
+       CT_ERROR("'%s' readdir_r failed (%s)\n", dirpath, strerror(-rc));
+
+out:
+       closedir(dir);
+       return rc;
+}
+
+static int ct_max_sequence(void)
+{
+       int   rc, i;
+       char  path[PATH_MAX];
+       __u64 seq = 0;
+       __u16 subseq;
+
+       strncpy(path, opt.o_hsm_root, sizeof(path));
+       /* FID sequence is stored in top-level directory names:
+        * hsm_root/16bits (high weight)/16 bits/16 bits/16 bits (low weight).
+        */
+       for (i = 0; i < 4; i++) {
+               rc = ct_dir_level_max(path, &subseq);
+               if (rc != 0)
+                       return rc;
+               seq |= ((__u64)subseq << ((3 - i) * 16));
+               sprintf(path + strlen(path), "/%04x", subseq);
+       }
+
+       printf("max_sequence: %016Lx\n", seq);
+
+       return 0;
+}
+
+static void handler(int signal)
+{
+       psignal(signal, "exiting");
+       /* If we don't clean up upon interrupt, umount thinks there's a ref
+        * and doesn't remove us from mtab (EINPROGRESS). The lustre client
+        * does successfully unmount and the mount is actually gone, but the
+        * mtab entry remains. So this just makes mtab happier. */
+       llapi_hsm_copytool_unregister(&ctdata);
+       _exit(1);
+}
+
+/* Daemon waits for messages from the kernel; run it in the background. */
+static int ct_daemon(void)
+{
+       int     rc;
+
+       rc = daemon(1, 1);
+       if (rc < 0) {
+               CT_ERROR("%d: cannot start as daemon (%s)", getpid(),
+                        strerror(errno));
+               return -errno;
+       }
+
+       rc = llapi_hsm_copytool_register(&ctdata, opt.o_mnt, 0,
+                                        opt.o_archive_cnt, opt.o_archive_id);
+       if (rc < 0) {
+               CT_ERROR("%d: cannot start copytool interface: %s\n", getpid(),
+                        strerror(-rc));
+               return rc;
+       }
+
+       signal(SIGINT, handler);
+       signal(SIGTERM, handler);
+
+       while (1) {
+               struct hsm_action_list  *hal;
+               struct hsm_action_item  *hai;
+               int                      msgsize;
+               int                      i = 0;
+
+               CT_TRACE("%d: waiting for message from kernel\n", getpid());
+
+               rc = llapi_hsm_copytool_recv(ctdata, &hal, &msgsize);
+               if (rc == -ESHUTDOWN) {
+                       CT_TRACE("%d: shutting down", getpid());
+                       break;
+               } else if (rc == -EAGAIN) {
+                       continue; /* msg not for us */
+               } else if (rc < 0) {
+                       CT_WARN("%d: message receive: (%s)\n", getpid(),
+                               strerror(-rc));
+                       err_major++;
+                       if (opt.o_abort_on_error)
+                               break;
+                       else
+                               continue;
+               }
+
+               CT_TRACE("%d: copytool fs=%s archive#=%d item_count=%d\n",
+                        getpid(), hal->hal_fsname, hal->hal_archive_id,
+                        hal->hal_count);
+
+               if (strcmp(hal->hal_fsname, fs_name) != 0) {
+                       CT_ERROR("'%s' invalid fs name, expecting: %s\n",
+                                hal->hal_fsname, fs_name);
+                       err_major++;
+                       if (opt.o_abort_on_error)
+                               break;
+                       else
+                               continue;
+               }
+
+               hai = hai_zero(hal);
+               while (++i <= hal->hal_count) {
+                       if ((char *)hai - (char *)hal > msgsize) {
+                               CT_ERROR("'%s' item %d past end of message!\n",
+                                        opt.o_mnt, i);
+                               err_major++;
+                               rc = -EPROTO;
+                               break;
+                       }
+                       rc = ct_process_item_async(hai, hal->hal_flags);
+                       if (rc < 0)
+                               CT_ERROR("'%s' item %d process err: %s\n",
+                                        opt.o_mnt, i, strerror(-rc));
+                       if (opt.o_abort_on_error && err_major)
+                               break;
+                       hai = hai_next(hai);
+               }
+
+               llapi_hsm_action_list_free(&hal);
+
+               if (opt.o_abort_on_error && err_major)
+                       break;
+       }
+
+       llapi_hsm_copytool_unregister(&ctdata);
+
+       return rc;
+}
+
+static int ct_setup(void)
+{
+       int     rc;
+
+       /* set llapi message level */
+       llapi_msg_set_level(opt.o_verbose);
+
+       arc_fd = open(opt.o_hsm_root, O_DIRECTORY);
+       if (arc_fd < 0) {
+               CT_ERROR("cannot open archive at '%s': %s\n", opt.o_hsm_root,
+                        strerror(errno));
+               return -errno;
+       }
+
+       rc = llapi_search_fsname(opt.o_mnt, fs_name);
+       if (rc) {
+               CT_ERROR("cannot find a Lustre filesystem mounted at: %s\n",
+                        opt.o_mnt);
+               return -rc;
+       }
+
+       return rc;
+}
+
+static int ct_cleanup(void)
+{
+       if (arc_fd < 0)
+               return 0;
+
+       if (close(arc_fd) < 0) {
+               CT_ERROR("cannot close archive: %s.\n", strerror(errno));
+               return -errno;
+       }
+
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       int     rc;
+
+       strncpy(cmd_name, basename(argv[0]), sizeof(cmd_name));
+       rc = ct_parseopts(argc, argv);
+       if (rc) {
+               CT_ERROR("try '%s --help' for more information.\n", cmd_name);
+               return -rc;
+       }
+
+       ct_setup();
+
+       switch (opt.o_action) {
+       case CA_IMPORT:
+               rc = ct_import_recurse(opt.o_src);
+               break;
+       case CA_REBIND:
+               rc = ct_rebind();
+               break;
+       case CA_MAXSEQ:
+               rc = ct_max_sequence();
+               break;
+       case CA_DAEMON:
+               rc = ct_daemon();
+               break;
+       default:
+               CT_ERROR("no action specified. Try '%s --help' for more "
+                        "information.\n", cmd_name);
+               rc = -EINVAL;
+               break;
+       }
+
+       if (opt.o_action != CA_MAXSEQ)
+               CT_TRACE("%s(%d) finished, errs: %d major, %d minor, "
+                        "rc=%d (%s)\n", argv[0], getpid(), err_major,
+                        err_minor, rc, strerror(-rc));
+
+       ct_cleanup();
+
+       return -rc;
+}
+
index 818d54c..15cea80 100644 (file)
@@ -170,8 +170,8 @@ void llapi_printf(int level, char *fmt, ...)
 /**
  * size_units is to be initialized (or zeroed) by caller.
  */
-int parse_size(char *optarg, unsigned long long *size,
-               unsigned long long *size_units, int bytes_spec)
+int llapi_parse_size(const char *optarg, unsigned long long *size,
+                    unsigned long long *size_units, int bytes_spec)
 {
         char *end;
 
index 5d08949..72b64cc 100644 (file)
@@ -26,7 +26,8 @@
  *
  * Author: Aurelien Degremont <aurelien.degremont@cea.fr>
  * Author: JC Lafoucriere <jacques-charles.lafoucriere@cea.fr>
- * Author: Thomas leibovici <thomas.leibovici@cea.fr>
+ * Author: Thomas Leibovici <thomas.leibovici@cea.fr>
+ * Author: Henri Doreau <henri.doreau@cea.fr>
  */
 
 #include <stdlib.h>
 #define CT_PRIV_MAGIC 0xC0BE2001
 struct hsm_copytool_private {
        int                      magic;
-       char                    *fsname;
+       char                    *mnt;
+       int                      mnt_fd;
        lustre_kernelcomm        kuc;
        __u32                    archives;
 };
 
+#define CP_PRIV_MAGIC 0x19880429
+struct hsm_copyaction_private {
+       __u32                                    magic;
+       __s32                                    data_fd;
+       const struct hsm_copytool_private       *ct_priv;
+       struct hsm_copy                          copy;
+};
+
 #include <libcfs/libcfs.h>
 
 /** Register a copytool
  * \param[out] priv Opaque private control structure
- * \param fsname Lustre filesystem
+ * \param mnt Lustre filesystem mount point
  * \param flags Open flags, currently unused (e.g. O_NONBLOCK)
  * \param archive_count
  * \param archives Which archive numbers this copytool is responsible for
  */
-int llapi_hsm_copytool_start(struct hsm_copytool_private **priv, char *fsname,
-                            int flags, int archive_count, int *archives)
+int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
+                               const char *mnt, int flags, int archive_count,
+                               int *archives)
 {
        struct hsm_copytool_private     *ct;
        int                              rc;
@@ -92,12 +103,18 @@ int llapi_hsm_copytool_start(struct hsm_copytool_private **priv, char *fsname,
        if (ct == NULL)
                return -ENOMEM;
 
-       ct->fsname = malloc(strlen(fsname) + 1);
-       if (ct->fsname == NULL) {
+       ct->mnt_fd = open(mnt, O_DIRECTORY | O_RDONLY | O_NONBLOCK);
+       if (ct->mnt_fd < 0) {
+               rc = -errno;
+               goto out_err;
+       }
+
+       ct->mnt = strdup(mnt);
+       if (ct->mnt == NULL) {
                rc = -ENOMEM;
                goto out_err;
        }
-       strcpy(ct->fsname, fsname);
+
        ct->magic = CT_PRIV_MAGIC;
 
        /* no archives specified means "match all". */
@@ -126,8 +143,16 @@ int llapi_hsm_copytool_start(struct hsm_copytool_private **priv, char *fsname,
 
        /* Storing archive(s) in lk_data; see mdc_ioc_hsm_ct_start */
        ct->kuc.lk_data = ct->archives;
-       rc = root_ioctl(ct->fsname, LL_IOC_HSM_CT_START, &(ct->kuc), NULL,
-                       WANT_ERROR);
+       rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc);
+       if (rc < 0) {
+               rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc,
+                           "cannot start copytool on '%s'", mnt);
+               goto out_err;
+       } else {
+               rc = 0;
+       }
+
        /* Only the kernel reference keeps the write side open */
        close(ct->kuc.lk_wfd);
        ct->kuc.lk_wfd = 0;
@@ -140,36 +165,44 @@ int llapi_hsm_copytool_start(struct hsm_copytool_private **priv, char *fsname,
 out_kuc:
        /* cleanup the kuc channel */
        libcfs_ukuc_stop(&ct->kuc);
+
 out_err:
-       if (ct->fsname)
-               free(ct->fsname);
+       if (!(ct->mnt_fd < 0))
+               close(ct->mnt_fd);
+       if (ct->mnt != NULL)
+               free(ct->mnt);
        free(ct);
        return rc;
 }
 
 /** Deregister a copytool
- * Note: under Linux, until llapi_hsm_copytool_fini is called (or the program is
- * killed), the libcfs module will be referenced and unremovable,
- * even after Lustre services stop.
+ * Note: under Linux, until llapi_hsm_copytool_unregister is called
+ * (or the program is killed), the libcfs module will be referenced
+ * and unremovable, even after Lustre services stop.
  */
-int llapi_hsm_copytool_fini(struct hsm_copytool_private **priv)
+int llapi_hsm_copytool_unregister(struct hsm_copytool_private **priv)
 {
        struct hsm_copytool_private *ct;
 
+       if (priv == NULL || *priv == NULL)
+               return -EINVAL;
+
        ct = *priv;
-       if (!ct || (ct->magic != CT_PRIV_MAGIC))
+       if (ct->magic != CT_PRIV_MAGIC)
                return -EINVAL;
 
        /* Tell the kernel to stop sending us messages */
        ct->kuc.lk_flags = LK_FLG_STOP;
-       root_ioctl(ct->fsname, LL_IOC_HSM_CT_START, &(ct->kuc), NULL, 0);
+       ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, &ct->kuc);
 
        /* Shut down the kernelcomms */
        libcfs_ukuc_stop(&ct->kuc);
 
-       free(ct->fsname);
+       close(ct->mnt_fd);
+       free(ct->mnt);
        free(ct);
        *priv = NULL;
+
        return 0;
 }
 
@@ -183,12 +216,13 @@ int llapi_hsm_copytool_fini(struct hsm_copytool_private **priv)
 int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
                            struct hsm_action_list **halh, int *msgsize)
 {
-       struct kuc_hdr                  *kuch;
-       struct hsm_action_list          *hal;
-       int                              rc = 0;
+       struct kuc_hdr          *kuch;
+       struct hsm_action_list  *hal;
+       int                      rc = 0;
 
-       if (!ct || (ct->magic != CT_PRIV_MAGIC))
+       if (ct == NULL || ct->magic != CT_PRIV_MAGIC)
                return -EINVAL;
+
        if (halh == NULL || msgsize == NULL)
                return -EINVAL;
 
@@ -256,132 +290,282 @@ out_free:
 }
 
 /** Release the action list when done with it. */
-int llapi_hsm_copytool_free(struct hsm_action_list **hal)
+void llapi_hsm_action_list_free(struct hsm_action_list **hal)
 {
        /* Reuse the llapi_changelog_free function */
-       return llapi_changelog_free((struct changelog_ext_rec **)hal);
+       llapi_changelog_free((struct changelog_ext_rec **)hal);
 }
 
+/** Get parent path from mount point and fid.
+ *
+ * \param mnt        Filesystem root path.
+ * \param fid        Object FID.
+ * \param parent     Destination buffer.
+ * \param parent_len Destination buffer size.
+ * \return 0 on success.
+ */
+static int fid_parent(const char *mnt, const lustre_fid *fid, char *parent,
+                     size_t parent_len)
+{
+       int              rc;
+       int              linkno = 0;
+       long long        recno = -1;
+       char             file[PATH_MAX];
+       char             strfid[FID_NOBRACE_LEN + 1];
+       char            *ptr;
+
+       snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(fid));
 
-/**
+       rc = llapi_fid2path(mnt, strfid, file, sizeof(file),
+                           &recno, &linkno);
+       if (rc < 0)
+               return rc;
+
+       /* fid2path returns a relative path */
+       rc = snprintf(parent, parent_len, "%s/%s", mnt, file);
+       if (rc >= parent_len)
+               return -ENAMETOOLONG;
+
+       /* remove file name */
+       ptr = strrchr(parent, '/');
+       if (ptr == NULL || ptr == parent) {
+               rc = -EINVAL;
+       } else {
+               *ptr = '\0';
+               rc = 0;
+       }
+
+       return rc;
+}
+
+/** Create the destination volatile file for a restore operation.
+ *
+ * \param hcp  Private copyaction handle.
+ * \return 0 on success.
+ */
+static int create_restore_volatile(struct hsm_copyaction_private *hcp)
+{
+       int                      rc;
+       int                      fd;
+       char                     parent[PATH_MAX + 1];
+       const char              *mnt = hcp->ct_priv->mnt;
+       struct hsm_action_item  *hai = &hcp->copy.hc_hai;
+
+       rc = fid_parent(mnt, &hai->hai_fid, parent, sizeof(parent));
+       if (rc < 0) {
+               /* fid_parent() failed, try to keep on going */
+               llapi_error(LLAPI_MSG_ERROR, rc,
+                           "cannot get parent path to restore "DFID
+                           "using '%s'", PFID(&hai->hai_fid), mnt);
+               snprintf(parent, sizeof(parent), "%s", mnt);
+       }
+
+       fd = llapi_create_volatile_idx(parent, 0, O_LOV_DELAY_CREATE);
+       if (fd < 0)
+               return fd;
+
+       rc = llapi_fd2fid(fd, &hai->hai_dfid);
+       if (rc < 0)
+               goto err_cleanup;
+
+       hcp->data_fd = fd;
+
+       return 0;
+
+err_cleanup:
+       hcp->data_fd = -1;
+       close(fd);
+
+       return rc;
+}
+
+/** Start processing an HSM action.
  * Should be called by copytools just before starting handling a request.
  * It could be skipped if copytool only want to directly report an error,
- * \see llapi_hsm_copy_end().
+ * \see llapi_hsm_action_end().
  *
- * \param mnt   Mount point of the corresponding Lustre filesystem.
- * \param hai   The hsm_action_item describing the request they will handle.
- * \param copy  Updated by this call. Caller will passed it to
- *             llapi_hsm_copy_end()
+ * \param hcp      Opaque action handle to be passed to
+ *                 llapi_hsm_action_progress and llapi_hsm_action_end.
+ * \param ct       Copytool handle acquired at registration.
+ * \param hai      The hsm_action_item describing the request.
+ * \param is_error Whether this call is just to report an error.
  *
  * \return 0 on success.
  */
-int llapi_hsm_copy_start(char *mnt, struct hsm_copy *copy,
-                        const struct hsm_action_item *hai)
+int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
+                          const struct hsm_copytool_private *ct,
+                          const struct hsm_action_item *hai, bool is_error)
 {
-       int     fd;
-       int     rc;
+       struct hsm_copyaction_private   *hcp;
+       int                              rc;
+
+       hcp = calloc(1, sizeof(*hcp));
+       if (hcp == NULL)
+               return -ENOMEM;
 
-       if (memcpy(&copy->hc_hai, hai, sizeof(*hai)) == NULL)
-               RETURN(-EFAULT);
+       hcp->data_fd = -1;
+       hcp->ct_priv = ct;
+       hcp->copy.hc_hai = *hai;
+       hcp->copy.hc_hai.hai_len = sizeof(*hai);
 
-       rc = get_root_path(WANT_FD, NULL, &fd, mnt, -1);
-       if (rc)
-               return rc;
+       if (is_error)
+               goto ok_out;
 
-       rc = ioctl(fd, LL_IOC_HSM_COPY_START, copy);
-       /* If error, return errno value */
-       rc = rc ? -errno : 0;
-       close(fd);
+       if (hai->hai_action == HSMA_RESTORE) {
+               rc = create_restore_volatile(hcp);
+               if (rc < 0)
+                       goto err_out;
+       }
+
+       rc = ioctl(ct->mnt_fd, LL_IOC_HSM_COPY_START, &hcp->copy);
+       if (rc < 0) {
+               rc = -errno;
+               goto err_out;
+       }
+
+ok_out:
+       hcp->magic = CP_PRIV_MAGIC;
+       *phcp = hcp;
+       return 0;
+
+err_out:
+       if (!(hcp->data_fd < 0))
+               close(hcp->data_fd);
+
+       free(hcp);
 
        return rc;
 }
 
-/**
+/** Terminate an HSM action processing.
  * Should be called by copytools just having finished handling the request.
- *
- * \param mnt   Mount point of the corresponding Lustre filesystem.
- * \param copy  The element used when calling llapi_hsm_copy_start()
- * \param hp    A hsm_progress structure describing the final state of the
- *             request.
- *
- * There is a special case which can be used only when the copytool cannot
- * start the copy at all and want to directly return an error. In this case,
- * simply fill \a hp structure and set \a copy to NULL. It is useless to call
- * llapi_hsm_copy_start() in this case.
+ * \param hdl[in,out]  Handle returned by llapi_hsm_action_start.
+ * \param he[in]       The final range of copied data (for copy actions).
+ * \param errval[in]   The status code of the operation.
+ * \param flags[in]    The flags about the termination status (HP_FLAG_RETRY if
+ *                     the error is retryable).
  *
  * \return 0 on success.
  */
-int llapi_hsm_copy_end(char *mnt, struct hsm_copy *copy,
-                      const struct hsm_progress *hp)
+int llapi_hsm_action_end(struct hsm_copyaction_private **phcp,
+                        const struct hsm_extent *he, int flags, int errval)
 {
-       int     end_only = 0;
-       int     fd;
-       int     rc;
-
-       /* llapi_hsm_copy_start() was skipped, so alloc copy. It will
-        * only be used to give the needed progress information. */
-       if (copy == NULL) {
-               /* This is only ok if there is an error. */
-               if (hp->hp_errval == 0)
-                       return -EINVAL;
-
-               copy = (struct hsm_copy *)malloc(sizeof(*copy));
-               if (copy == NULL)
-                       return -ENOMEM;
-               end_only = 1;
-               copy->hc_hai.hai_cookie = hp->hp_cookie;
-               copy->hc_hai.hai_fid = hp->hp_fid;
-               copy->hc_hai.hai_action = HSMA_NONE;
+       struct hsm_copyaction_private   *hcp;
+       struct hsm_action_item          *hai;
+       int                              rc;
+
+       if (phcp == NULL || *phcp == NULL || he == NULL)
+               return -EINVAL;
+
+       hcp = *phcp;
+
+       if (hcp->magic != CP_PRIV_MAGIC)
+               return -EINVAL;
+
+       hai = &hcp->copy.hc_hai;
+
+       /* In some cases, like restore, 2 FIDs are used.
+        * Set the right FID to use here. */
+       if (hai->hai_action == HSMA_ARCHIVE || hai->hai_action == HSMA_RESTORE)
+               hai->hai_fid = hai->hai_dfid;
+
+       /* Fill the last missing data that will be needed by
+        * kernel to send a hsm_progress. */
+       hcp->copy.hc_flags  = flags;
+       hcp->copy.hc_errval = abs(errval);
+
+       hcp->copy.hc_hai.hai_extent = *he;
+
+       rc = ioctl(hcp->ct_priv->mnt_fd, LL_IOC_HSM_COPY_END, &hcp->copy);
+       if (rc) {
+               rc = -errno;
+               goto err_cleanup;
        }
 
-       /* Fill the last missing data that will be needed by kernel
-        * to send a hsm_progress. */
-       copy->hc_flags = hp->hp_flags;
-       copy->hc_errval = hp->hp_errval;
-       /* Update hai if it has changed since start */
-       copy->hc_hai.hai_extent = hp->hp_extent;
-       /* In some cases, like restore, 2 FIDs are used. hp knows the right FID
-        * to use here. */
-       copy->hc_hai.hai_fid = hp->hp_fid;
-
-       rc = get_root_path(WANT_FD, NULL, &fd, mnt, -1);
-       if (rc)
-               goto out_free;
+err_cleanup:
+       if (!(hcp->data_fd < 0))
+               close(hcp->data_fd);
 
-       rc = ioctl(fd, LL_IOC_HSM_COPY_END, copy);
-       /* If error, return errno value */
-       rc = rc ? -errno : 0;
-       close(fd);
+       free(hcp);
+       *phcp = NULL;
 
-out_free:
-       if (end_only)
-               free(copy);
+       return rc;
+}
+
+/** Notify a progress in processing an HSM action.
+ * \param hdl[in,out]   handle returned by llapi_hsm_action_start.
+ * \param he[in]        the range of copied data (for copy actions).
+ * \param hp_flags[in]  HSM progress flags.
+ * \return 0 on success.
+ */
+int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
+                             const struct hsm_extent *he, int hp_flags)
+{
+       int                      rc;
+       struct hsm_progress      hp;
+       struct hsm_action_item  *hai;
+
+       if (hcp == NULL || he == NULL)
+               return -EINVAL;
+
+       if (hcp->magic != CP_PRIV_MAGIC)
+               return -EINVAL;
+
+       hai = &hcp->copy.hc_hai;
+
+       memset(&hp, 0, sizeof(hp));
+
+       hp.hp_cookie = hai->hai_cookie;
+       hp.hp_flags  = hp_flags;
+
+       /* Progress is made on the data fid */
+       hp.hp_fid = hai->hai_dfid;
+       hp.hp_extent = *he;
+
+       rc = ioctl(hcp->ct_priv->mnt_fd, LL_IOC_HSM_PROGRESS, &hp);
+       if (rc < 0)
+               rc = -errno;
 
        return rc;
 }
 
+/** Get the fid of object to be used for copying data.
+ * @return error code if the action is not a copy operation.
+ */
+int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
+                             lustre_fid *fid)
+{
+       const struct hsm_action_item    *hai = &hcp->copy.hc_hai;
+
+       if (hcp->magic != CP_PRIV_MAGIC)
+               return -EINVAL;
+
+       if (hai->hai_action != HSMA_RESTORE && hai->hai_action != HSMA_ARCHIVE)
+               return -EINVAL;
+
+       *fid = hai->hai_dfid;
+
+       return 0;
+}
+
 /**
- * Copytool progress reporting.
+ * Get a file descriptor to be used for copying data. It's up to the
+ * caller to close the FDs obtained from this function.
  *
- * \a hp->hp_errval should be EAGAIN until action is completely finished.
- *
- * \return 0 on success, an error code otherwise.
+ * @retval a file descriptor on success.
+ * @retval a negative error code on failure.
  */
-int llapi_hsm_progress(char *mnt, struct hsm_progress *hp)
+int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp)
 {
-       int     fd;
-       int     rc;
+       const struct hsm_action_item    *hai = &hcp->copy.hc_hai;
 
-       rc = get_root_path(WANT_FD, NULL, &fd, mnt, -1);
-       if (rc)
-               return rc;
+       if (hcp->magic != CP_PRIV_MAGIC)
+               return -EINVAL;
 
-       rc = ioctl(fd, LL_IOC_HSM_PROGRESS, hp);
-       /* If error, save errno value */
-       rc = rc ? -errno : 0;
+       if (hai->hai_action != HSMA_RESTORE)
+               return -EINVAL;
 
-       close(fd);
-       return rc;
+       return dup(hcp->data_fd);
 }
 
 /**
@@ -599,17 +783,17 @@ struct hsm_user_request *llapi_hsm_user_request_alloc(int itemcount,
 /**
  * Send a HSM request to Lustre, described in \param request.
  *
- * This request should be allocated with llapi_hsm_user_request_alloc().
+ * \param path   Fullpath to the file to operate on.
+ * \param request The request, allocated with llapi_hsm_user_request_alloc().
  *
- * \param mnt Should be the Lustre moint point.
  * \return 0 on success, an error code otherwise.
  */
-int llapi_hsm_request(char *mnt, struct hsm_user_request *request)
+int llapi_hsm_request(const char *path, const struct hsm_user_request *request)
 {
        int rc;
        int fd;
 
-       rc = get_root_path(WANT_FD, NULL, &fd, mnt, -1);
+       rc = get_root_path(WANT_FD, NULL, &fd, (char *)path, -1);
        if (rc)
                return rc;