Whamcloud - gitweb
LU-3345 llapi: add user space method for lov_user_md
[fs/lustre-release.git] / lustre / utils / lfs.c
index df60d24..f258453 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #include <lustre/lustreapi.h>
 
 #include <libcfs/libcfsutil.h>
+#include <obd.h>
+#include <obd_lov.h>
 #include "obdctl.h"
 
 /* all functions */
 static int lfs_setstripe(int argc, char **argv);
 static int lfs_find(int argc, char **argv);
 static int lfs_getstripe(int argc, char **argv);
+static int lfs_getdirstripe(int argc, char **argv);
+static int lfs_setdirstripe(int argc, char **argv);
+static int lfs_rmentry(int argc, char **argv);
 static int lfs_osts(int argc, char **argv);
 static int lfs_mdts(int argc, char **argv);
 static int lfs_df(int argc, char **argv);
@@ -104,33 +109,69 @@ static int lfs_changelog_clear(int argc, char **argv);
 static int lfs_fid2path(int argc, char **argv);
 static int lfs_path2fid(int argc, char **argv);
 static int lfs_data_version(int argc, char **argv);
+static int lfs_hsm_state(int argc, char **argv);
+static int lfs_hsm_set(int argc, char **argv);
+static int lfs_hsm_clear(int argc, char **argv);
+static int lfs_hsm_action(int argc, char **argv);
+static int lfs_hsm_archive(int argc, char **argv);
+static int lfs_hsm_restore(int argc, char **argv);
+static int lfs_hsm_release(int argc, char **argv);
+static int lfs_hsm_remove(int argc, char **argv);
+static int lfs_hsm_cancel(int argc, char **argv);
+static int lfs_swap_layouts(int argc, char **argv);
+
+#define SETSTRIPE_USAGE(_cmd, _tgt) \
+       "usage: "_cmd" [--stripe-count|-c <stripe_count>]\n"\
+       "                 [--stripe-index|-i <start_ost_idx>]\n"\
+       "                 [--stripe-size|-S <stripe_size>]\n"\
+       "                 [--pool|-p <pool_name>]\n"\
+       "                 [--block|-b] "_tgt"\n"\
+       "\tstripe_size:  Number of bytes on each OST (0 filesystem default)\n"\
+       "\t              Can be specified with k, m or g (in KB, MB and GB\n"\
+       "\t              respectively)\n"\
+       "\tstart_ost_idx: OST index of first stripe (-1 default)\n"\
+       "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n"\
+       "\tpool_name:    Name of OST pool to use (default none)\n"\
+       "\tblock:        Block file access during data migration"
 
 /* all avaialable commands */
 command_t cmdlist[] = {
-        {"setstripe", lfs_setstripe, 0,
-         "Create a new file with a specific striping pattern or\n"
-         "set the default striping pattern on an existing directory or\n"
-         "delete the default striping pattern from an existing directory\n"
-         "usage: setstripe [--stripe-count|-c <stripe_count>]\n"
-         "                 [--stripe-index|-i <start_ost_idx>]\n"
-         "                 [--stripe-size|-S <stripe_size>]\n"
-         "                 [--pool|-p <pool_name>] <directory|filename>\n"
-         " or\n"
-         "       setstripe -d <directory>   (to delete default striping)\n"
-         "\tstripe_size:  Number of bytes on each OST (0 filesystem default)\n"
-         "\t              Can be specified with k, m or g (in KB, MB and GB\n"
-         "\t              respectively)\n"
-         "\tstart_ost_idx: OST index of first stripe (-1 default)\n"
-         "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n"
-         "\tpool_name:    Name of OST pool to use (default none)"},
-        {"getstripe", lfs_getstripe, 0,
-         "To list the striping info for a given file or files in a\n"
-         "directory or recursively for all files in a directory tree.\n"
-         "usage: getstripe [--ost|-O <uuid>] [--quiet | -q] [--verbose | -v]\n"
-         "                 [--stripe-count|-c] [--stripe-index|-i]\n"
-         "                 [--pool|-p] [--stripe-size|-S] [--directory|-d]\n"
-         "                 [--mdt-index|-M] [--recursive|-r] [--raw|-R]\n"
-         "                 <directory|filename> ..."},
+       {"setstripe", lfs_setstripe, 0,
+        "Create a new file with a specific striping pattern or\n"
+        "set the default striping pattern on an existing directory or\n"
+        "delete the default striping pattern from an existing directory\n"
+        "usage: setstripe -d <directory>   (to delete default striping)\n"\
+        " or\n"
+        SETSTRIPE_USAGE("setstripe", "<directory|filename>")},
+       {"getstripe", lfs_getstripe, 0,
+        "To list the striping info for a given file or files in a\n"
+        "directory or recursively for all files in a directory tree.\n"
+        "usage: getstripe [--ost|-O <uuid>] [--quiet | -q] [--verbose | -v]\n"
+        "                 [--stripe-count|-c] [--stripe-index|-i]\n"
+        "                 [--pool|-p] [--stripe-size|-S] [--directory|-d]\n"
+        "                 [--mdt-index|-M] [--recursive|-r] [--raw|-R]\n"
+        "                 <directory|filename> ..."},
+       {"setdirstripe", lfs_setdirstripe, 0,
+        "To create a remote directory on a specified MDT.\n"
+        "usage: setdirstripe <--index|-i mdt_index> <dir>\n"
+        "\tmdt_index:    MDT index of first stripe\n"},
+       {"getdirstripe", lfs_getdirstripe, 0,
+        "To list the striping info for a given directory\n"
+        "or recursively for all directories in a directory tree.\n"
+        "usage: getdirstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v]\n"
+        "               [--count|-c ] [--index|-i ] [--raw|-R]\n"
+        "               [--recursive | -r] <dir> ..."},
+       {"mkdir", lfs_setdirstripe, 0,
+        "To create a remote directory on a specified MDT. And this can only\n"
+        "be done on MDT0 by administrator.\n"
+        "usage: mkdir <--index|-i mdt_index> <dir>\n"
+        "\tmdt_index:    MDT index of the remote directory.\n"},
+       {"rm_entry", lfs_rmentry, 0,
+        "To remove the name entry of the remote directory. Note: This\n"
+        "command will only delete the name entry, i.e. the remote directory\n"
+        "will become inaccessable after this command. This can only be done\n"
+        "by the administrator\n"
+        "usage: rm_entry <dir>\n"},
         {"pool_list", lfs_poollist, 0,
          "List pools or pool OSTs\n"
          "usage: pool_list <fsname>[.<pool>] | <pathname>\n"},
@@ -238,6 +279,38 @@ command_t cmdlist[] = {
          "usage: path2fid <path>"},
         {"data_version", lfs_data_version, 0, "Display file data version for "
          "a given path.\n" "usage: data_version [-n] <path>"},
+       {"hsm_state", lfs_hsm_state, 0, "Display the HSM information (states, "
+        "undergoing actions) for given files.\n usage: hsm_state <file> ..."},
+       {"hsm_set", lfs_hsm_set, 0, "Set HSM user flag on specified files.\n"
+        "usage: hsm_set [--norelease] [--noarchive] [--dirty] [--exists] "
+        "[--archived] [--lost] <file> ..."},
+       {"hsm_clear", lfs_hsm_clear, 0, "Clear HSM user flag on specified "
+        "files.\n"
+        "usage: hsm_clear [--norelease] [--noarchive] [--dirty] [--exists] "
+        "[--archived] [--lost] <file> ..."},
+       {"hsm_action", lfs_hsm_action, 0, "Display current HSM request for "
+        "given files.\n" "usage: hsm_action <file> ..."},
+       {"hsm_archive", lfs_hsm_archive, 0,
+        "Archive file to external storage.\n"
+        "usage: hsm_archive [--filelist FILELIST] [--data DATA] [--archive NUM] "
+        "<file> ..."},
+       {"hsm_restore", lfs_hsm_restore, 0,
+        "Restore file from external storage.\n"
+        "usage: hsm_restore [--filelist FILELIST] [--data DATA] <file> ..."},
+       {"hsm_release", lfs_hsm_release, 0,
+        "Release files from Lustre.\n"
+        "usage: hsm_release [--filelist FILELIST] [--data DATA] <file> ..."},
+       {"hsm_remove", lfs_hsm_remove, 0,
+        "Remove file copy from external storage.\n"
+        "usage: hsm_remove [--filelist FILELIST] [--data DATA] <file> ..."},
+       {"hsm_cancel", lfs_hsm_cancel, 0,
+        "Cancel requests related to specified files.\n"
+        "usage: hsm_cancel [--filelist FILELIST] [--data DATA] <file> ..."},
+       {"swap_layouts", lfs_swap_layouts, 0, "Swap layouts between 2 files.\n"
+        "usage: swap_layouts <path1> <path2>"},
+       {"migrate", lfs_setstripe, 0, "migrate file from one layout to "
+        "another (may be not safe with concurent writes).\n"
+        SETSTRIPE_USAGE("migrate  ", "<filename>")},
         {"help", Parser_help, 0, "help"},
         {"exit", Parser_quit, 0, "quit"},
         {"quit", Parser_quit, 0, "quit"},
@@ -259,70 +332,292 @@ static int isnumber(const char *str)
         return 1;
 }
 
+#define MIGRATION_BLOCKS 1
+
+static int lfs_migrate(char *name, unsigned long long stripe_size,
+                      int stripe_offset, int stripe_count,
+                      int stripe_pattern, char *pool_name,
+                      __u64 migration_flags)
+{
+       int                      fd, fdv;
+       char                     volatile_file[PATH_MAX];
+       char                     parent[PATH_MAX];
+       char                    *ptr;
+       int                      rc;
+       __u64                    dv1;
+       struct lov_user_md      *lum = NULL;
+       int                      lumsz;
+       int                      bufsz;
+       void                    *buf = NULL;
+       int                      rsize, wsize;
+       __u64                    rpos, wpos, bufoff;
+       int                      gid = 0, sz;
+       int                      have_gl = 0;
+
+       /* find the right size for the IO and allocate the buffer */
+       lumsz = lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3);
+       lum = malloc(lumsz);
+       if (lum == NULL) {
+               rc = -ENOMEM;
+               goto free;
+       }
+
+       rc = llapi_file_get_stripe(name, lum);
+       /* failure can come from may case and some may be not real error
+        * (eg: no stripe)
+        * in case of a real error, a later call will failed with a better
+        * error management */
+       if (rc < 0)
+               bufsz = 1024*1024;
+       else
+               bufsz = lum->lmm_stripe_size;
+       rc = posix_memalign(&buf, getpagesize(), bufsz);
+       if (rc != 0) {
+               rc = -rc;
+               goto free;
+       }
+
+       if (migration_flags & MIGRATION_BLOCKS) {
+               /* generate a random id for the grouplock */
+               fd = open("/dev/urandom", O_RDONLY);
+               if (fd == -1) {
+                       rc = -errno;
+                       fprintf(stderr, "cannot open /dev/urandom (%s)\n",
+                               strerror(-rc));
+                       goto free;
+               }
+               sz = sizeof(gid);
+               rc = read(fd, &gid, sz);
+               close(fd);
+               if (rc < sz) {
+                       rc = -errno;
+                       fprintf(stderr, "cannot read %d bytes from"
+                               " /dev/urandom (%s)\n", sz, strerror(-rc));
+                       goto free;
+               }
+       }
+
+       /* search for file directory pathname */
+       strcpy(parent, name);
+       ptr = strrchr(parent, '/');
+       if (ptr == NULL) {
+               if (getcwd(parent, sizeof(parent)) == NULL) {
+                       rc = -errno;
+                       goto free;
+               }
+       } else {
+               if (ptr == parent)
+                       strcpy(parent, "/");
+               else
+                       *ptr = '\0';
+       }
+       sprintf(volatile_file, "%s/%s::", parent, LUSTRE_VOLATILE_HDR);
+
+       /* create, open a volatile file, use caching (ie no directio) */
+       /* exclusive create is not needed because volatile files cannot
+        * conflict on name by construction */
+       fdv = llapi_file_open_pool(volatile_file, O_CREAT | O_WRONLY,
+                                  0644, stripe_size, stripe_offset,
+                                  stripe_count, stripe_pattern, pool_name);
+       if (fdv < 0) {
+               rc = fdv;
+               fprintf(stderr, "cannot create volatile file in %s (%s)\n",
+                       parent, strerror(-rc));
+               goto free;
+       }
+
+       /* open file, direct io */
+       /* even if the file is only read, WR mode is nedeed to allow
+        * layout swap on fd */
+       fd = open(name, O_RDWR | O_DIRECT);
+       if (fd == -1) {
+               rc = -errno;
+               fprintf(stderr, "cannot open %s (%s)\n", name, strerror(-rc));
+               close(fdv);
+               goto free;
+       }
+
+       /* get file data version */
+       rc = llapi_get_data_version(fd, &dv1, 0);
+       if (rc != 0) {
+               fprintf(stderr, "cannot get dataversion on %s (%s)\n",
+                       name, strerror(-rc));
+               goto error;
+       }
+
+       if (migration_flags & MIGRATION_BLOCKS) {
+               /* take group lock to limit concurent access
+                * this will be no more needed when exclusive access will
+                * be implemented (see LU-2919) */
+               /* group lock is taken after data version read because it
+                * blocks data version call */
+               if (ioctl(fd, LL_IOC_GROUP_LOCK, gid) == -1) {
+                       rc = -errno;
+                       fprintf(stderr, "cannot get group lock on %s (%s)\n",
+                               name, strerror(-rc));
+                       goto error;
+               }
+               have_gl = 1;
+       }
+
+       /* copy data */
+       rpos = 0;
+       wpos = 0;
+       bufoff = 0;
+       rsize = -1;
+       do {
+               /* read new data only if we have written all
+                * previously read data */
+               if (wpos == rpos) {
+                       rsize = read(fd, buf, bufsz);
+                       if (rsize < 0) {
+                               rc = -errno;
+                               fprintf(stderr, "read failed on %s"
+                                       " (%s)\n", name,
+                                       strerror(-rc));
+                               goto error;
+                       }
+                       rpos += rsize;
+                       bufoff = 0;
+               }
+               /* eof ? */
+               if (rsize == 0)
+                       break;
+               wsize = write(fdv, buf + bufoff, rpos - wpos);
+               if (wsize < 0) {
+                       rc = -errno;
+                       fprintf(stderr, "write failed on volatile"
+                               " for %s (%s)\n", name, strerror(-rc));
+                       goto error;
+               }
+               wpos += wsize;
+               bufoff += wsize;
+       } while (1);
+
+       /* flush data */
+       fsync(fdv);
+
+       if (migration_flags & MIGRATION_BLOCKS) {
+               /* give back group lock */
+               if (ioctl(fd, LL_IOC_GROUP_UNLOCK, gid) == -1) {
+                       rc = -errno;
+                       fprintf(stderr, "cannot put group lock on %s (%s)\n",
+                               name, strerror(-rc));
+               }
+               have_gl = 0;
+       }
+
+       /* swap layouts
+        * for a migration we need to:
+        * - check data version on file did not change
+        * - keep file mtime
+        * - keep file atime
+        */
+       rc = llapi_fswap_layouts(fd, fdv, dv1, 0,
+                                SWAP_LAYOUTS_CHECK_DV1 |
+                                SWAP_LAYOUTS_KEEP_MTIME |
+                                SWAP_LAYOUTS_KEEP_ATIME);
+       if (rc == -EAGAIN) {
+               fprintf(stderr, "file dataversion for %s has changed"
+                               " during copy, migration is aborted\n",
+                       name);
+               goto error;
+       }
+       if (rc != 0)
+               fprintf(stderr, "cannot swap layouts between %s and "
+                       "a volatile file (%s)\n",
+                       name, strerror(-rc));
+
+error:
+       /* give back group lock */
+       if ((migration_flags & MIGRATION_BLOCKS) && have_gl &&
+           (ioctl(fd, LL_IOC_GROUP_UNLOCK, gid) == -1)) {
+               /* we keep in rc the original error */
+               fprintf(stderr, "cannot put group lock on %s (%s)\n",
+                       name, strerror(-errno));
+       }
+
+       close(fdv);
+       close(fd);
+free:
+       if (lum)
+               free(lum);
+       if (buf)
+               free(buf);
+       return rc;
+}
+
 /* functions */
 static int lfs_setstripe(int argc, char **argv)
 {
-        char *fname;
-        int result;
-        unsigned long long st_size;
-        int  st_offset, st_count;
-        char *end;
-        int c;
-        int delete = 0;
-        char *stripe_size_arg = NULL;
-        char *stripe_off_arg = NULL;
-        char *stripe_count_arg = NULL;
-        char *pool_name_arg = NULL;
-        unsigned long long size_units = 1;
-
-        struct option long_opts[] = {
+       char                    *fname;
+       int                      result;
+       unsigned long long       st_size;
+       int                      st_offset, st_count;
+       char                    *end;
+       int                      c;
+       int                      delete = 0;
+       char                    *stripe_size_arg = NULL;
+       char                    *stripe_off_arg = NULL;
+       char                    *stripe_count_arg = NULL;
+       char                    *pool_name_arg = NULL;
+       unsigned long long       size_units = 1;
+       int                      migrate_mode = 0;
+       __u64                    migration_flags = 0;
+
+       struct option            long_opts[] = {
+               /* valid only in migrate mode */
+               {"block",        no_argument,       0, 'b'},
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
 #warning "remove deprecated --count option"
 #else
-                /* This formerly implied "stripe-count", but was explicitly
-                 * made "stripe-count" for consistency with other options,
-                 * and to separate it from "mdt-count" when DNE arrives. */
-                {"count",        required_argument, 0, 'c'},
+               /* This formerly implied "stripe-count", but was explicitly
+                * made "stripe-count" for consistency with other options,
+                * and to separate it from "mdt-count" when DNE arrives. */
+               {"count",        required_argument, 0, 'c'},
 #endif
-                {"stripe-count", required_argument, 0, 'c'},
-                {"stripe_count", required_argument, 0, 'c'},
-                {"delete",       no_argument,       0, 'd'},
+               {"stripe-count", required_argument, 0, 'c'},
+               {"stripe_count", required_argument, 0, 'c'},
+               {"delete",       no_argument,       0, 'd'},
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
 #warning "remove deprecated --index option"
 #else
-                /* This formerly implied "stripe-index", but was explicitly
-                 * made "stripe-index" for consistency with other options,
-                 * and to separate it from "mdt-index" when DNE arrives. */
-                {"index",        required_argument, 0, 'i'},
+               /* This formerly implied "stripe-index", but was explicitly
+                * made "stripe-index" for consistency with other options,
+                * and to separate it from "mdt-index" when DNE arrives. */
+               {"index",        required_argument, 0, 'i'},
 #endif
-                {"stripe-index", required_argument, 0, 'i'},
-                {"stripe_index", required_argument, 0, 'i'},
+               {"stripe-index", required_argument, 0, 'i'},
+               {"stripe_index", required_argument, 0, 'i'},
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
 #warning "remove deprecated --offset option"
 #else
-                /* This formerly implied "stripe-index", but was confusing
-                 * with "file offset" (which will eventually be needed for
-                 * with different layouts by offset), so deprecate it. */
-                {"offset",       required_argument, 0, 'o'},
+               /* This formerly implied "stripe-index", but was confusing
+                * with "file offset" (which will eventually be needed for
+                * with different layouts by offset), so deprecate it. */
+               {"offset",       required_argument, 0, 'o'},
 #endif
-                {"pool",         required_argument, 0, 'p'},
+               {"pool",         required_argument, 0, 'p'},
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
 #warning "remove deprecated --size option"
 #else
-                /* This formerly implied "--stripe-size", but was confusing
-                 * with "lfs find --size|-s", which means "file size", so use
-                 * the consistent "--stripe-size|-S" for all commands. */
-                {"size",         required_argument, 0, 's'},
+               /* This formerly implied "--stripe-size", but was confusing
+                * with "lfs find --size|-s", which means "file size", so use
+                * the consistent "--stripe-size|-S" for all commands. */
+               {"size",         required_argument, 0, 's'},
 #endif
-                {"stripe-size",  required_argument, 0, 'S'},
-                {"stripe_size",  required_argument, 0, 'S'},
-                {0, 0, 0, 0}
-        };
+               {"stripe-size",  required_argument, 0, 'S'},
+               {"stripe_size",  required_argument, 0, 'S'},
+               {0, 0, 0, 0}
+       };
 
         st_size = 0;
         st_offset = -1;
         st_count = 0;
 
+       if (strcmp(argv[0], "migrate") == 0)
+               migrate_mode = 1;
+
 #if LUSTRE_VERSION < OBD_OCD_VERSION(2,4,50,0)
         if (argc == 5 && argv[1][0] != '-' &&
             isnumber(argv[2]) && isnumber(argv[3]) && isnumber(argv[4])) {
@@ -341,6 +636,14 @@ static int lfs_setstripe(int argc, char **argv)
                 case 0:
                         /* Long options. */
                         break;
+               case 'b':
+                       if (migrate_mode == 0) {
+                               fprintf(stderr, "--block is valid only for"
+                                               " migrate mode");
+                               return CMD_HELP;
+                       }
+                       migration_flags |= MIGRATION_BLOCKS;
+                       break;
                 case 'c':
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
 #warning "remove deprecated --count option"
@@ -439,18 +742,26 @@ static int lfs_setstripe(int argc, char **argv)
                 }
         }
 
-        do {
-                result = llapi_file_create_pool(fname, st_size, st_offset,
-                                                st_count, 0, pool_name_arg);
-                if (result) {
-                        fprintf(stderr,"error: %s: create stripe file '%s' "
-                                "failed\n", argv[0], fname);
-                        break;
-                }
-                fname = argv[++optind];
-        } while (fname != NULL);
-
-        return result;
+       do {
+               if (migrate_mode)
+                       result = lfs_migrate(fname, st_size, st_offset,
+                                            st_count, 0, pool_name_arg,
+                                            migration_flags);
+               else
+                       result = llapi_file_create_pool(fname, st_size,
+                                                       st_offset, st_count,
+                                                       0, pool_name_arg);
+               if (result) {
+                       fprintf(stderr,
+                               "error: %s: %s stripe file '%s' failed\n",
+                               argv[0], migrate_mode ? "migrate" : "create",
+                               fname);
+                       break;
+               }
+               fname = argv[++optind];
+       } while (fname != NULL);
+
+       return result;
 }
 
 static int lfs_poollist(int argc, char **argv)
@@ -882,7 +1193,8 @@ err:
         return ret;
 }
 
-static int lfs_getstripe(int argc, char **argv)
+static int lfs_getstripe_internal(int argc, char **argv,
+                                 struct find_param *param)
 {
         struct option long_opts[] = {
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
@@ -937,127 +1249,126 @@ static int lfs_getstripe(int argc, char **argv)
                 {0, 0, 0, 0}
         };
         int c, rc;
-        struct find_param param = { 0 };
 
-        param.maxdepth = 1;
-        optind = 0;
-        while ((c = getopt_long(argc, argv, "cdghiMoO:pqrRsSv",
-                                long_opts, NULL)) != -1) {
-                switch (c) {
-                case 'O':
-                        if (param.obduuid) {
-                                fprintf(stderr,
-                                        "error: %s: only one obduuid allowed",
-                                        argv[0]);
-                                return CMD_HELP;
-                        }
-                        param.obduuid = (struct obd_uuid *)optarg;
-                        break;
-                case 'q':
-                        param.quiet++;
-                        break;
-                case 'd':
-                        param.maxdepth = 0;
-                        break;
-                case 'r':
-                        param.recursive = 1;
-                        break;
-                case 'v':
-                        param.verbose = VERBOSE_ALL | VERBOSE_DETAIL;
-                        break;
-                case 'c':
+       param->maxdepth = 1;
+       optind = 0;
+       while ((c = getopt_long(argc, argv, "cdghiMoO:pqrRsSv",
+                               long_opts, NULL)) != -1) {
+               switch (c) {
+               case 'O':
+                       if (param->obduuid) {
+                               fprintf(stderr,
+                                       "error: %s: only one obduuid allowed",
+                                       argv[0]);
+                               return CMD_HELP;
+                       }
+                       param->obduuid = (struct obd_uuid *)optarg;
+                       break;
+               case 'q':
+                       param->quiet++;
+                       break;
+               case 'd':
+                       param->maxdepth = 0;
+                       break;
+               case 'r':
+                       param->recursive = 1;
+                       break;
+               case 'v':
+                       param->verbose = VERBOSE_ALL | VERBOSE_DETAIL;
+                       break;
+               case 'c':
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
 #warning "remove deprecated --count option"
 #elif LUSTRE_VERSION >= OBD_OCD_VERSION(2,6,50,0)
-                        if (strcmp(argv[optind - 1], "--count") == 0)
-                                fprintf(stderr, "warning: '--count' deprecated,"
-                                        " use '--stripe-count' instead\n");
+                       if (strcmp(argv[optind - 1], "--count") == 0)
+                               fprintf(stderr, "warning: '--count' deprecated,"
+                                       " use '--stripe-count' instead\n");
 #endif
-                        if (!(param.verbose & VERBOSE_DETAIL)) {
-                                param.verbose |= VERBOSE_COUNT;
-                                param.maxdepth = 0;
-                        }
-                        break;
-                case 's':
+                       if (!(param->verbose & VERBOSE_DETAIL)) {
+                               param->verbose |= VERBOSE_COUNT;
+                               param->maxdepth = 0;
+                       }
+                       break;
+               case 's':
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
 #warning "remove deprecated --size option"
 #elif LUSTRE_VERSION >= OBD_OCD_VERSION(2,6,50,0)
-                        fprintf(stderr, "warning: '--size|-s' deprecated, "
-                                "use '--stripe-size|-S' instead\n");
+                       fprintf(stderr, "warning: '--size|-s' deprecated, "
+                               "use '--stripe-size|-S' instead\n");
 #endif
-                case 'S':
-                        if (!(param.verbose & VERBOSE_DETAIL)) {
-                                param.verbose |= VERBOSE_SIZE;
-                                param.maxdepth = 0;
-                        }
-                        break;
-                case 'o':
+               case 'S':
+                       if (!(param->verbose & VERBOSE_DETAIL)) {
+                               param->verbose |= VERBOSE_SIZE;
+                               param->maxdepth = 0;
+                       }
+                       break;
+               case 'o':
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,4,50,0)
-                        fprintf(stderr, "warning: '--offset|-o' deprecated, "
-                                "use '--stripe-index|-i' instead\n");
+                       fprintf(stderr, "warning: '--offset|-o' deprecated, "
+                               "use '--stripe-index|-i' instead\n");
 #else
-                        if (strcmp(argv[optind - 1], "--offset") == 0)
-                                /* need --stripe-index established first */
-                                fprintf(stderr, "warning: '--offset' deprecated"
-                                        ", use '--index' instead\n");
+                       if (strcmp(argv[optind - 1], "--offset") == 0)
+                               /* need --stripe-index established first */
+                               fprintf(stderr, "warning: '--offset' deprecated"
+                                       ", use '--index' instead\n");
 #endif
-                case 'i':
+               case 'i':
 #if LUSTRE_VERSION >= OBD_OCD_VERSION(2,9,50,0)
 #warning "remove deprecated --offset and --index options"
 #elif LUSTRE_VERSION >= OBD_OCD_VERSION(2,6,50,0)
-                        if (strcmp(argv[optind - 1], "--index") == 0)
-                                fprintf(stderr, "warning: '--index' deprecated"
-                                        ", use '--stripe-index' instead\n");
+                       if (strcmp(argv[optind - 1], "--index") == 0)
+                               fprintf(stderr, "warning: '--index' deprecated"
+                                       ", use '--stripe-index' instead\n");
 #endif
-                        if (!(param.verbose & VERBOSE_DETAIL)) {
-                                param.verbose |= VERBOSE_OFFSET;
-                                param.maxdepth = 0;
-                        }
-                        break;
-                case 'p':
-                        if (!(param.verbose & VERBOSE_DETAIL)) {
-                                param.verbose |= VERBOSE_POOL;
-                                param.maxdepth = 0;
-                        }
-                        break;
-                case 'g':
-                        if (!(param.verbose & VERBOSE_DETAIL)) {
-                                param.verbose |= VERBOSE_GENERATION;
-                                param.maxdepth = 0;
-                        }
-                        break;
-                case 'M':
-                        if (!(param.verbose & VERBOSE_DETAIL))
-                                param.maxdepth = 0;
-                        param.verbose |= VERBOSE_MDTINDEX;
-                        break;
-                case 'R':
-                        param.raw = 1;
-                        break;
-                default:
-                        return CMD_HELP;
-                }
-        }
-
-        if (optind >= argc)
-                return CMD_HELP;
-
-        if (param.recursive)
-                param.maxdepth = -1;
-
-        if (!param.verbose)
-                param.verbose = VERBOSE_ALL;
-        if (param.quiet)
-                param.verbose = VERBOSE_OBJID;
-
-        do {
-                rc = llapi_getstripe(argv[optind], &param);
-        } while (++optind < argc && !rc);
-
-        if (rc)
-                fprintf(stderr, "error: %s failed for %s.\n",
-                        argv[0], argv[optind - 1]);
-        return rc;
+                       if (!(param->verbose & VERBOSE_DETAIL)) {
+                               param->verbose |= VERBOSE_OFFSET;
+                               param->maxdepth = 0;
+                       }
+                       break;
+               case 'p':
+                       if (!(param->verbose & VERBOSE_DETAIL)) {
+                               param->verbose |= VERBOSE_POOL;
+                               param->maxdepth = 0;
+                       }
+                       break;
+               case 'g':
+                       if (!(param->verbose & VERBOSE_DETAIL)) {
+                               param->verbose |= VERBOSE_GENERATION;
+                               param->maxdepth = 0;
+                       }
+                       break;
+               case 'M':
+                       if (!(param->verbose & VERBOSE_DETAIL))
+                               param->maxdepth = 0;
+                       param->verbose |= VERBOSE_MDTINDEX;
+                       break;
+               case 'R':
+                       param->raw = 1;
+                       break;
+               default:
+                       return CMD_HELP;
+               }
+       }
+
+       if (optind >= argc)
+               return CMD_HELP;
+
+       if (param->recursive)
+               param->maxdepth = -1;
+
+       if (!param->verbose)
+               param->verbose = VERBOSE_ALL;
+       if (param->quiet)
+               param->verbose = VERBOSE_OBJID;
+
+       do {
+               rc = llapi_getstripe(argv[optind], param);
+       } while (++optind < argc && !rc);
+
+       if (rc)
+               fprintf(stderr, "error: %s failed for %s.\n",
+                       argv[0], argv[optind - 1]);
+       return rc;
 }
 
 static int lfs_tgts(int argc, char **argv)
@@ -1098,6 +1409,117 @@ static int lfs_tgts(int argc, char **argv)
         return rc;
 }
 
+static int lfs_getstripe(int argc, char **argv)
+{
+       struct find_param param = { 0 };
+       return lfs_getstripe_internal(argc, argv, &param);
+}
+
+/* functions */
+static int lfs_getdirstripe(int argc, char **argv)
+{
+       struct find_param param = { 0 };
+
+       param.get_lmv = 1;
+       return lfs_getstripe_internal(argc, argv, &param);
+}
+
+/* functions */
+static int lfs_setdirstripe(int argc, char **argv)
+{
+       char *dname;
+       int result;
+       int  st_offset, st_count;
+       char *end;
+       int c;
+       char *stripe_off_arg = NULL;
+       int  flags = 0;
+
+       struct option long_opts[] = {
+               {"index",    required_argument, 0, 'i'},
+               {0, 0, 0, 0}
+       };
+
+       st_offset = -1;
+       st_count = 1;
+       optind = 0;
+       while ((c = getopt_long(argc, argv, "i:o",
+                               long_opts, NULL)) >= 0) {
+               switch (c) {
+               case 0:
+                       /* Long options. */
+                       break;
+               case 'i':
+                       stripe_off_arg = optarg;
+                       break;
+               default:
+                       fprintf(stderr, "error: %s: option '%s' "
+                                       "unrecognized\n",
+                                       argv[0], argv[optind - 1]);
+                       return CMD_HELP;
+               }
+       }
+
+       if (optind == argc) {
+               fprintf(stderr, "error: %s: missing dirname\n",
+                       argv[0]);
+               return CMD_HELP;
+       }
+
+       dname = argv[optind];
+       if (stripe_off_arg == NULL) {
+               fprintf(stderr, "error: %s: missing stripe_off.\n",
+                       argv[0]);
+               return CMD_HELP;
+       }
+       /* get the stripe offset */
+       st_offset = strtoul(stripe_off_arg, &end, 0);
+       if (*end != '\0') {
+               fprintf(stderr, "error: %s: bad stripe offset '%s'\n",
+                       argv[0], stripe_off_arg);
+               return CMD_HELP;
+       }
+       do {
+               result = llapi_dir_create_pool(dname, flags, st_offset,
+                                              st_count, 0, NULL);
+               if (result) {
+                       fprintf(stderr, "error: %s: create stripe dir '%s' "
+                               "failed\n", argv[0], dname);
+                       break;
+               }
+               dname = argv[++optind];
+       } while (dname != NULL);
+
+       return result;
+}
+
+/* functions */
+static int lfs_rmentry(int argc, char **argv)
+{
+       char *dname;
+       int   index;
+       int   result = 0;
+
+       if (argc <= 1) {
+               fprintf(stderr, "error: %s: missing dirname\n",
+                       argv[0]);
+               return CMD_HELP;
+       }
+
+       index = 1;
+       dname = argv[index];
+       while (dname != NULL) {
+               result = llapi_direntry_remove(dname);
+               if (result) {
+                       fprintf(stderr, "error: %s: remove dir entry '%s' "
+                               "failed\n", argv[0], dname);
+                       break;
+               }
+               dname = argv[++index];
+       }
+       return result;
+}
+
 static int lfs_osts(int argc, char **argv)
 {
         return lfs_tgts(argc, argv);
@@ -2710,6 +3132,442 @@ static int lfs_data_version(int argc, char **argv)
        return rc;
 }
 
+static int lfs_hsm_state(int argc, char **argv)
+{
+       int rc;
+       int i = 1;
+       char *path;
+       struct hsm_user_state hus;
+
+       if (argc < 2)
+               return CMD_HELP;
+
+       do {
+               path = argv[i];
+
+               rc = llapi_hsm_state_get(path, &hus);
+               if (rc) {
+                       fprintf(stderr, "can't get hsm state for %s: %s\n",
+                               path, strerror(errno = -rc));
+                       return rc;
+               }
+
+               /* Display path name and status flags */
+               printf("%s: (0x%08x)", path, hus.hus_states);
+
+               if (hus.hus_states & HS_RELEASED)
+                       printf(" released");
+               if (hus.hus_states & HS_EXISTS)
+                       printf(" exists");
+               if (hus.hus_states & HS_DIRTY)
+                       printf(" dirty");
+               if (hus.hus_states & HS_ARCHIVED)
+                       printf(" archived");
+               /* Display user-settable flags */
+               if (hus.hus_states & HS_NORELEASE)
+                       printf(" never_release");
+               if (hus.hus_states & HS_NOARCHIVE)
+                       printf(" never_archive");
+               if (hus.hus_states & HS_LOST)
+                       printf(" lost_from_hsm");
+
+               if (hus.hus_archive_id != 0)
+                       printf(", archive_id:%d", hus.hus_archive_id);
+               printf("\n");
+
+       } while (++i < argc);
+
+       return 0;
+}
+
+#define LFS_HSM_SET   0
+#define LFS_HSM_CLEAR 1
+
+/**
+ * Generic function to set or clear HSM flags.
+ * Used by hsm_set and hsm_clear.
+ *
+ * @mode  if LFS_HSM_SET, set the flags, if LFS_HSM_CLEAR, clear the flags.
+ */
+static int lfs_hsm_change_flags(int argc, char **argv, int mode)
+{
+       struct option long_opts[] = {
+               {"lost", 0, 0, 'l'},
+               {"norelease", 0, 0, 'r'},
+               {"noarchive", 0, 0, 'a'},
+               {"archived", 0, 0, 'A'},
+               {"dirty", 0, 0, 'd'},
+               {"exists", 0, 0, 'e'},
+               {0, 0, 0, 0}
+       };
+       char short_opts[] = "lraAde";
+       __u64 mask = 0;
+       int c, rc;
+       char *path;
+
+       if (argc < 3)
+               return CMD_HELP;
+
+       optind = 0;
+       while ((c = getopt_long(argc, argv, short_opts,
+                               long_opts, NULL)) != -1) {
+               switch (c) {
+               case 'l':
+                       mask |= HS_LOST;
+                       break;
+               case 'a':
+                       mask |= HS_NOARCHIVE;
+                       break;
+               case 'A':
+                       mask |= HS_ARCHIVED;
+                       break;
+               case 'r':
+                       mask |= HS_NORELEASE;
+                       break;
+               case 'd':
+                       mask |= HS_DIRTY;
+                       break;
+               case 'e':
+                       mask |= HS_EXISTS;
+                       break;
+               case '?':
+                       return CMD_HELP;
+               default:
+                       fprintf(stderr, "error: %s: option '%s' unrecognized\n",
+                               argv[0], argv[optind - 1]);
+                       return CMD_HELP;
+               }
+       }
+
+       /* User should have specified a flag */
+       if (mask == 0)
+               return CMD_HELP;
+
+       while (optind < argc) {
+
+               path = argv[optind];
+
+               /* If mode == 0, this means we apply the mask. */
+               if (mode == LFS_HSM_SET)
+                       rc = llapi_hsm_state_set(path, mask, 0, 0);
+               else
+                       rc = llapi_hsm_state_set(path, 0, mask, 0);
+
+               if (rc != 0) {
+                       fprintf(stderr, "Can't change hsm flags for %s: %s\n",
+                               path, strerror(errno = -rc));
+                       return rc;
+               }
+               optind++;
+       }
+
+       return 0;
+}
+
+static int lfs_hsm_action(int argc, char **argv)
+{
+       int                              rc;
+       int                              i = 1;
+       char                            *path;
+       struct hsm_current_action        hca;
+       struct hsm_extent                he;
+       enum hsm_user_action             hua;
+       enum hsm_progress_states         hps;
+
+       if (argc < 2)
+               return CMD_HELP;
+
+       do {
+               path = argv[i];
+
+               rc = llapi_hsm_current_action(path, &hca);
+               if (rc) {
+                       fprintf(stderr, "can't get hsm action for %s: %s\n",
+                               path, strerror(errno = -rc));
+                       return rc;
+               }
+               he = hca.hca_location;
+               hua = hca.hca_action;
+               hps = hca.hca_state;
+
+               printf("%s: %s", path, hsm_user_action2name(hua));
+
+               /* Skip file without action */
+               if (hca.hca_action == HUA_NONE) {
+                       printf("\n");
+                       continue;
+               }
+
+               printf(" %s ", hsm_progress_state2name(hps));
+
+               if ((hps == HPS_RUNNING) &&
+                   (hua == HUA_ARCHIVE || hua == HUA_RESTORE))
+                       printf("("LPX64 " bytes moved)\n", he.length);
+               else if ((he.offset + he.length) == OBD_OBJECT_EOF)
+                       printf("(from "LPX64 " to EOF)\n", he.offset);
+               else
+                       printf("(from "LPX64 " to "LPX64")\n",
+                              he.offset, he.offset + he.length);
+
+       } while (++i < argc);
+
+       return 0;
+}
+
+static int lfs_hsm_set(int argc, char **argv)
+{
+       return lfs_hsm_change_flags(argc, argv, LFS_HSM_SET);
+}
+
+static int lfs_hsm_clear(int argc, char **argv)
+{
+       return lfs_hsm_change_flags(argc, argv, LFS_HSM_CLEAR);
+}
+
+/**
+ * Check file state and return its fid, to be used by lfs_hsm_request().
+ *
+ * \param[in]     file      Path to file to check
+ * \param[in,out] fid       Pointer to allocated lu_fid struct.
+ * \param[in,out] last_dev  Pointer to last device id used.
+ *
+ * \return 0 on success.
+ */
+static int lfs_hsm_prepare_file(char *file, struct lu_fid *fid,
+                               dev_t *last_dev)
+{
+       struct stat     st;
+       int             rc;
+
+       rc = lstat(file, &st);
+       if (rc) {
+               fprintf(stderr, "Cannot stat %s: %s\n", file, strerror(errno));
+               return -errno;
+       }
+       /* A request should be ... */
+       if (*last_dev != st.st_dev && *last_dev != 0) {
+               fprintf(stderr, "All files should be "
+                       "on the same filesystem: %s\n", file);
+               return -EINVAL;
+       }
+       *last_dev = st.st_dev;
+
+       rc = llapi_path2fid(file, fid);
+       if (rc) {
+               fprintf(stderr, "Cannot read FID of %s: %s\n",
+                       file, strerror(-rc));
+               return rc;
+       }
+       return 0;
+}
+
+static int lfs_hsm_request(int argc, char **argv, int action)
+{
+       struct option            long_opts[] = {
+               {"filelist", 1, 0, 'l'},
+               {"data", 1, 0, 'D'},
+               {"archive", 1, 0, 'a'},
+               {0, 0, 0, 0}
+       };
+       dev_t                    last_dev = 0;
+       char                     short_opts[] = "l:D:a:";
+       struct hsm_user_request *hur, *oldhur;
+       int                      c, i;
+       size_t                   len;
+       int                      nbfile;
+       char                    *line = NULL;
+       char                    *filelist = NULL;
+       char                     fullpath[PATH_MAX];
+       char                    *opaque = NULL;
+       int                      opaque_len = 0;
+       int                      archive_id = 0;
+       FILE                    *fp;
+       int                      nbfile_alloc = 0;
+       char                     some_file[PATH_MAX+1] = "";
+       int                      rc;
+
+       if (argc < 2)
+               return CMD_HELP;
+
+       optind = 0;
+       while ((c = getopt_long(argc, argv, short_opts,
+                               long_opts, NULL)) != -1) {
+               switch (c) {
+               case 'l':
+                       filelist = optarg;
+                       break;
+               case 'D':
+                       opaque = optarg;
+                       break;
+               case 'a':
+                       if (action != HUA_ARCHIVE) {
+                               fprintf(stderr,
+                                       "error: -a is supported only "
+                                       "when archiving\n");
+                               return CMD_HELP;
+                       }
+                       archive_id = atoi(optarg);
+                       break;
+               case '?':
+                       return CMD_HELP;
+               default:
+                       fprintf(stderr, "error: %s: option '%s' unrecognized\n",
+                               argv[0], argv[optind - 1]);
+                       return CMD_HELP;
+               }
+       }
+
+       /* All remaining args are files, so we have at least nbfile */
+       nbfile = argc - optind;
+
+       if ((nbfile == 0) && (filelist == NULL))
+               return CMD_HELP;
+
+       if (opaque != NULL)
+               opaque_len = strlen(opaque);
+
+       /* Alloc the request structure with enough place to store all files
+        * from command line. */
+       hur = llapi_hsm_user_request_alloc(nbfile, opaque_len);
+       if (hur == NULL) {
+               fprintf(stderr, "Cannot create the request: %s\n",
+                       strerror(errno));
+               return errno;
+       }
+       nbfile_alloc = nbfile;
+
+       hur->hur_request.hr_action = action;
+       hur->hur_request.hr_archive_id = archive_id;
+       hur->hur_request.hr_flags = 0;
+
+       /* All remaining args are files, add them */
+       if (nbfile != 0)
+               strcpy(some_file, argv[optind]);
+
+       for (i = 0; i < nbfile; i++) {
+               hur->hur_user_item[i].hui_extent.length = -1;
+               rc = lfs_hsm_prepare_file(argv[optind + i],
+                                         &hur->hur_user_item[i].hui_fid,
+                                         &last_dev);
+               hur->hur_request.hr_itemcount++;
+               if (rc)
+                       goto out_free;
+       }
+
+       /* from here stop using nb_file, use hur->hur_request.hr_itemcount */
+
+       /* If a filelist was specified, read the filelist from it. */
+       if (filelist != NULL) {
+               fp = fopen(filelist, "r");
+               if (fp == NULL) {
+                       fprintf(stderr, "Cannot read the file list %s: %s\n",
+                               filelist, strerror(errno));
+                       rc = -errno;
+                       goto out_free;
+               }
+
+               while ((rc = getline(&line, &len, fp)) != -1) {
+                       struct hsm_user_item *hui;
+
+                       /* If allocated buffer was too small, gets something
+                        * bigger */
+                       if (nbfile_alloc <= hur->hur_request.hr_itemcount) {
+                               nbfile_alloc = nbfile_alloc * 2 + 1;
+                               oldhur = hur;
+                               hur = llapi_hsm_user_request_alloc(nbfile_alloc,
+                                                                  opaque_len);
+                               if (hur == NULL) {
+                                       fprintf(stderr, "Cannot allocate "
+                                               "the request: %s\n",
+                                               strerror(errno));
+                                       hur = oldhur;
+                                       rc = -errno;
+                                       goto out_free;
+                               }
+                               memcpy(hur, oldhur, hur_len(oldhur));
+                               free(oldhur);
+                       }
+
+                       /* Chop CR */
+                       if (line[strlen(line) - 1] == '\n')
+                               line[strlen(line) - 1] = '\0';
+
+                       hui =
+                            &hur->hur_user_item[hur->hur_request.hr_itemcount];
+                       hui->hui_extent.length = -1;
+                       rc = lfs_hsm_prepare_file(line, &hui->hui_fid,
+                                                 &last_dev);
+                       hur->hur_request.hr_itemcount++;
+                       if (rc)
+                               goto out_free;
+
+                       if ((some_file[0] == '\0') &&
+                           (strlen(line) < sizeof(some_file)))
+                               strcpy(some_file, line);
+               }
+
+               rc = fclose(fp);
+               if (line)
+                       free(line);
+       }
+
+       /* If a --data was used, add it to the request */
+       hur->hur_request.hr_data_len = opaque_len;
+       if (opaque != NULL)
+               memcpy(hur_data(hur), opaque, opaque_len);
+
+       /* Send the HSM request */
+       if (realpath(some_file, fullpath) == NULL) {
+               fprintf(stderr, "Could not find path '%s': %s\n",
+                       some_file, strerror(errno));
+       }
+       rc = llapi_hsm_request(fullpath, hur);
+       if (rc) {
+               fprintf(stderr, "Cannot send HSM request (use of %s): %s\n",
+                       some_file, strerror(-rc));
+               goto out_free;
+       }
+
+out_free:
+       free(hur);
+       return rc;
+}
+
+static int lfs_hsm_archive(int argc, char **argv)
+{
+       return lfs_hsm_request(argc, argv, HUA_ARCHIVE);
+}
+
+static int lfs_hsm_restore(int argc, char **argv)
+{
+       return lfs_hsm_request(argc, argv, HUA_RESTORE);
+}
+
+static int lfs_hsm_release(int argc, char **argv)
+{
+       return lfs_hsm_request(argc, argv, HUA_RELEASE);
+}
+
+static int lfs_hsm_remove(int argc, char **argv)
+{
+       return lfs_hsm_request(argc, argv, HUA_REMOVE);
+}
+
+static int lfs_hsm_cancel(int argc, char **argv)
+{
+       return lfs_hsm_request(argc, argv, HUA_CANCEL);
+}
+
+static int lfs_swap_layouts(int argc, char **argv)
+{
+       if (argc != 3)
+               return CMD_HELP;
+
+       return llapi_swap_layouts(argv[1], argv[2], 0, 0,
+                                 SWAP_LAYOUTS_KEEP_MTIME |
+                                 SWAP_LAYOUTS_KEEP_ATIME);
+}
+
 int main(int argc, char **argv)
 {
         int rc;