+#define MIGRATION_BLOCKS 1
+
+static int lfs_migrate(char *name, unsigned long long stripe_size,
+ int stripe_offset, int stripe_count,
+ int stripe_pattern, char *pool_name,
+ __u64 migration_flags)
+{
+ int fd, fdv;
+ char volatile_file[PATH_MAX];
+ char parent[PATH_MAX];
+ char *ptr;
+ int rc;
+ __u64 dv1;
+ struct lov_user_md *lum = NULL;
+ int lumsz;
+ int bufsz;
+ void *buf = NULL;
+ int rsize, wsize;
+ __u64 rpos, wpos, bufoff;
+ int gid = 0, sz;
+ int have_gl = 0;
+
+ /* find the right size for the IO and allocate the buffer */
+ lumsz = lov_mds_md_size(LOV_MAX_STRIPE_COUNT, LOV_MAGIC_V3);
+ lum = malloc(lumsz);
+ if (lum == NULL) {
+ rc = -ENOMEM;
+ goto free;
+ }
+
+ rc = llapi_file_get_stripe(name, lum);
+ /* failure can come from may case and some may be not real error
+ * (eg: no stripe)
+ * in case of a real error, a later call will failed with a better
+ * error management */
+ if (rc < 0)
+ bufsz = 1024*1024;
+ else
+ bufsz = lum->lmm_stripe_size;
+ rc = posix_memalign(&buf, getpagesize(), bufsz);
+ if (rc != 0) {
+ rc = -rc;
+ goto free;
+ }
+
+ if (migration_flags & MIGRATION_BLOCKS) {
+ /* generate a random id for the grouplock */
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd == -1) {
+ rc = -errno;
+ fprintf(stderr, "cannot open /dev/urandom (%s)\n",
+ strerror(-rc));
+ goto free;
+ }
+ sz = sizeof(gid);
+ rc = read(fd, &gid, sz);
+ close(fd);
+ if (rc < sz) {
+ rc = -errno;
+ fprintf(stderr, "cannot read %d bytes from"
+ " /dev/urandom (%s)\n", sz, strerror(-rc));
+ goto free;
+ }
+ }
+
+ /* search for file directory pathname */
+ strcpy(parent, name);
+ ptr = strrchr(parent, '/');
+ if (ptr == NULL) {
+ if (getcwd(parent, sizeof(parent)) == NULL) {
+ rc = -errno;
+ goto free;
+ }
+ } else {
+ if (ptr == parent)
+ strcpy(parent, "/");
+ else
+ *ptr = '\0';
+ }
+ sprintf(volatile_file, "%s/%s::", parent, LUSTRE_VOLATILE_HDR);
+
+ /* create, open a volatile file, use caching (ie no directio) */
+ /* exclusive create is not needed because volatile files cannot
+ * conflict on name by construction */
+ fdv = llapi_file_open_pool(volatile_file, O_CREAT | O_WRONLY,
+ 0644, stripe_size, stripe_offset,
+ stripe_count, stripe_pattern, pool_name);
+ if (fdv < 0) {
+ rc = fdv;
+ fprintf(stderr, "cannot create volatile file in %s (%s)\n",
+ parent, strerror(-rc));
+ goto free;
+ }
+
+ /* open file, direct io */
+ /* even if the file is only read, WR mode is nedeed to allow
+ * layout swap on fd */
+ fd = open(name, O_RDWR | O_DIRECT);
+ if (fd == -1) {
+ rc = -errno;
+ fprintf(stderr, "cannot open %s (%s)\n", name, strerror(-rc));
+ close(fdv);
+ goto free;
+ }
+
+ /* get file data version */
+ rc = llapi_get_data_version(fd, &dv1, 0);
+ if (rc != 0) {
+ fprintf(stderr, "cannot get dataversion on %s (%s)\n",
+ name, strerror(-rc));
+ goto error;
+ }
+
+ if (migration_flags & MIGRATION_BLOCKS) {
+ /* take group lock to limit concurent access
+ * this will be no more needed when exclusive access will
+ * be implemented (see LU-2919) */
+ /* group lock is taken after data version read because it
+ * blocks data version call */
+ if (ioctl(fd, LL_IOC_GROUP_LOCK, gid) == -1) {
+ rc = -errno;
+ fprintf(stderr, "cannot get group lock on %s (%s)\n",
+ name, strerror(-rc));
+ goto error;
+ }
+ have_gl = 1;
+ }
+
+ /* copy data */
+ rpos = 0;
+ wpos = 0;
+ bufoff = 0;
+ rsize = -1;
+ do {
+ /* read new data only if we have written all
+ * previously read data */
+ if (wpos == rpos) {
+ rsize = read(fd, buf, bufsz);
+ if (rsize < 0) {
+ rc = -errno;
+ fprintf(stderr, "read failed on %s"
+ " (%s)\n", name,
+ strerror(-rc));
+ goto error;
+ }
+ rpos += rsize;
+ bufoff = 0;
+ }
+ /* eof ? */
+ if (rsize == 0)
+ break;
+ wsize = write(fdv, buf + bufoff, rpos - wpos);
+ if (wsize < 0) {
+ rc = -errno;
+ fprintf(stderr, "write failed on volatile"
+ " for %s (%s)\n", name, strerror(-rc));
+ goto error;
+ }
+ wpos += wsize;
+ bufoff += wsize;
+ } while (1);
+
+ /* flush data */
+ fsync(fdv);
+
+ if (migration_flags & MIGRATION_BLOCKS) {
+ /* give back group lock */
+ if (ioctl(fd, LL_IOC_GROUP_UNLOCK, gid) == -1) {
+ rc = -errno;
+ fprintf(stderr, "cannot put group lock on %s (%s)\n",
+ name, strerror(-rc));
+ }
+ have_gl = 0;
+ }
+
+ /* swap layouts
+ * for a migration we need to:
+ * - check data version on file did not change
+ * - keep file mtime
+ * - keep file atime
+ */
+ rc = llapi_fswap_layouts(fd, fdv, dv1, 0,
+ SWAP_LAYOUTS_CHECK_DV1 |
+ SWAP_LAYOUTS_KEEP_MTIME |
+ SWAP_LAYOUTS_KEEP_ATIME);
+ if (rc == -EAGAIN) {
+ fprintf(stderr, "file dataversion for %s has changed"
+ " during copy, migration is aborted\n",
+ name);
+ goto error;
+ }
+ if (rc != 0)
+ fprintf(stderr, "cannot swap layouts between %s and "
+ "a volatile file (%s)\n",
+ name, strerror(-rc));
+
+error:
+ /* give back group lock */
+ if ((migration_flags & MIGRATION_BLOCKS) && have_gl &&
+ (ioctl(fd, LL_IOC_GROUP_UNLOCK, gid) == -1)) {
+ /* we keep in rc the original error */
+ fprintf(stderr, "cannot put group lock on %s (%s)\n",
+ name, strerror(-errno));
+ }
+
+ close(fdv);
+ close(fd);
+free:
+ if (lum)
+ free(lum);
+ if (buf)
+ free(buf);
+ return rc;
+}
+