int llapi_group_lock(int fd, int gid);
int llapi_group_unlock(int fd, int gid);
+bool llapi_file_is_sparse(int fd);
+off_t llapi_data_seek(int src_fd, off_t offset, size_t *length);
+
/* Ladvise */
int llapi_ladvise(int fd, unsigned long long flags, int num_advise,
struct llapi_lu_ladvise *ladvise);
void *buf, size_t count, off_t pos);
ssize_t llapi_mirror_copy_many(int fd, __u16 src, __u16 *dst, size_t count);
int llapi_mirror_copy(int fd, unsigned int src, unsigned int dst,
- off_t pos, size_t count);
+ off_t pos, size_t count);
+off_t llapi_mirror_data_seek(int fd, unsigned int id, off_t pos, size_t *size);
int llapi_heat_get(int fd, struct lu_heat *heat);
int llapi_heat_set(int fd, __u64 flags);
+
int llapi_layout_sanity(struct llapi_layout *layout, bool incomplete, bool flr);
void llapi_layout_sanity_perror(int error);
int llapi_layout_dom_size(struct llapi_layout *layout, uint64_t *size);
}
}
-loff_t ll_lseek(struct inode *inode, loff_t offset, int whence)
+loff_t ll_lseek(struct file *file, loff_t offset, int whence)
{
+ struct inode *inode = file_inode(file);
struct lu_env *env;
struct cl_io *io;
struct cl_lseek_io *lsio;
io = vvp_env_thread_io(env);
io->ci_obj = ll_i2info(inode)->lli_clob;
+ ll_io_set_mirror(io, file);
lsio = &io->u.ci_lseek;
lsio->ls_start = offset;
do {
rc = cl_io_init(env, io, CIT_LSEEK, io->ci_obj);
- if (!rc)
+ if (!rc) {
+ struct vvp_io *vio = vvp_env_io(env);
+
+ vio->vui_fd = file->private_data;
rc = cl_io_loop(env, io);
- else
+ } else {
rc = io->ci_result;
+ }
retval = rc ? : lsio->ls_result;
cl_io_fini(env, io);
} while (unlikely(io->ci_need_restart));
cl_sync_file_range(inode, offset, OBD_OBJECT_EOF,
CL_FSYNC_LOCAL, 0);
- retval = ll_lseek(inode, offset, origin);
+ retval = ll_lseek(file, offset, origin);
if (retval < 0)
return retval;
retval = vfs_setpos(file, retval, ll_file_maxbytes(inode));
}
run_test 49a "FIEMAP upon FLR file"
-test_50() { # EX-2179
+test_50A() { # EX-2179
mkdir -p $DIR/$tdir
local file=$DIR/$tdir/$tfile
$LFS getstripe -v $file || error "getstripe $file failed"
}
-run_test 50 "mirror split update layout generation"
+run_test 50A "mirror split update layout generation"
+
+test_50a() {
+ $LCTL get_param osc.*.import | grep -q 'connect_flags:.*seek' ||
+ skip "OST does not support SEEK_HOLE"
+
+ local file=$DIR/$tdir/$tfile
+ local offset
+ local sum1
+ local sum2
+ local blocks
+
+ mkdir -p $DIR/$tdir
+
+ echo " ** create striped file $file"
+ $LFS setstripe -E 1M -c1 -S 1M -E eof -c2 -S1M $file ||
+ error "cannot create file with PFL layout"
+ echo " ** write 1st data chunk at 1M boundary"
+ dd if=/dev/urandom of=$file bs=1k count=20 seek=1021 ||
+ error "cannot write data at 1M boundary"
+ echo " ** write 2nd data chunk at 2M boundary"
+ dd if=/dev/urandom of=$file bs=1k count=20 seek=2041 ||
+ error "cannot write data at 2M boundary"
+ echo " ** create hole at the file end"
+ $TRUNCATE $file 3700000 || error "truncate fails"
+
+ echo " ** verify sparseness"
+ offset=$(lseek_test -d 1000 $file)
+ echo " first data offset: $offset"
+ [[ $offset == 1000 ]] &&
+ error "src: data is not expected at offset $offset"
+ offset=$(lseek_test -l 3500000 $file)
+ echo " hole at the end: $offset"
+ [[ $offset == 3500000 ]] ||
+ error "src: hole is expected at offset $offset"
+
+ echo " ** extend the file with new mirror"
+ # migrate_copy_data() is used
+ $LFS mirror extend -N -E 2M -S 1M -E 1G -S 2M -E eof $file ||
+ error "cannot create mirror"
+ $LFS getstripe $file | grep lcme_flags | grep stale > /dev/null &&
+ error "$file still has stale component"
+
+ # check migrate_data_copy() was correct
+ sum_1=$($LFS mirror read -N 1 $file | md5sum)
+ sum_2=$($LFS mirror read -N 2 $file | md5sum)
+ [[ $sum_1 == $sum_2 ]] ||
+ error "data mismatch: \'$sum_1\' vs. \'$sum_2\'"
+
+ # stale first mirror
+ $LFS setstripe --comp-set -I0x10001 --comp-flags=stale $file
+ $LFS setstripe --comp-set -I0x10002 --comp-flags=stale $file
+
+ echo " ** verify mirror #2 sparseness"
+ offset=$(lseek_test -d 1000 $file)
+ echo " first data offset: $offset"
+ [[ $offset == 1000 ]] &&
+ error "dst: data is not expected at offset $offset"
+ offset=$(lseek_test -l 3500000 $file)
+ echo " hole at the end: $offset"
+ [[ $offset == 3500000 ]] ||
+ error "dst: hole is expected at offset $offset"
+
+ echo " ** copy mirror #2 to mirror #1"
+ $LFS mirror copy -i 2 -o 1 $file || error "mirror copy fails"
+ $LFS getstripe $file | grep lcme_flags | grep stale > /dev/null &&
+ error "$file still has stale component"
+
+ # check llapi_mirror_copy_many correctness
+ sum_1=$($LFS mirror read -N 1 $file | md5sum)
+ sum_2=$($LFS mirror read -N 2 $file | md5sum)
+ [[ $sum_1 == $sum_2 ]] ||
+ error "data mismatch: \'$sum_1\' vs. \'$sum_2\'"
+
+ # stale 1st component of mirror #2 before lseek call
+ $LFS setstripe --comp-set -I0x20001 --comp-flags=stale $file
+
+ echo " ** verify mirror #1 sparseness again"
+ offset=$(lseek_test -d 1000 $file)
+ echo " first data offset: $offset"
+ [[ $offset == 1000 ]] &&
+ error "dst: data is not expected at offset $offset"
+ offset=$(lseek_test -l 3500000 $file)
+ echo " hole at the end: $offset"
+ [[ $offset == 3500000 ]] ||
+ error "dst: hole is expected at offset $offset"
+
+ cancel_lru_locks osc
+
+ blocks=$(stat -c%b $file)
+ echo " ** final consumed blocks: $blocks"
+ # for 3.5Mb file consumes ~6000 blocks, use 1000 to check
+ # that file is still sparse
+ (( blocks < 1000 )) ||
+ error "Mirrored file consumes $blocks blocks"
+
+ rm $file
+}
+run_test 50a "mirror extend/copy preserves sparseness"
ctrl_file=$(mktemp /tmp/CTRL.XXXXXX)
lock_file=$(mktemp /var/lock/FLR.XXXXXX)
liblustreapi_kernelconn.c liblustreapi_param.c \
liblustreapi_mirror.c liblustreapi_fid.c \
liblustreapi_ladvise.c liblustreapi_chlg.c \
- liblustreapi_heat.c liblustreapi_pcc.c
+ liblustreapi_heat.c liblustreapi_pcc.c \
+ liblustreapi_lseek.c
liblustreapi_la_LDFLAGS = $(LIBREADLINE) -version-info 1:0:0 \
-Wl,--version-script=liblustreapi.map
liblustreapi_la_LIBADD = $(top_builddir)/libcfs/libcfs/libcfs.la
static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int))
{
struct llapi_layout *layout;
- size_t buf_size = 4 * 1024 * 1024;
- void *buf = NULL;
- ssize_t rsize = -1;
- ssize_t wsize = 0;
- size_t rpos = 0;
- size_t wpos = 0;
- off_t bufoff = 0;
- int rc;
+ size_t buf_size = 4 * 1024 * 1024;
+ void *buf = NULL;
+ off_t pos = 0;
+ off_t data_end = 0;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+ bool sparse;
+ int rc;
layout = llapi_layout_get_by_fd(fd_src, 0);
if (layout) {
}
/* Use a page-aligned buffer for direct I/O */
- rc = posix_memalign(&buf, getpagesize(), buf_size);
+ rc = posix_memalign(&buf, page_size, buf_size);
if (rc != 0)
return -rc;
+ sparse = llapi_file_is_sparse(fd_src);
+ if (sparse) {
+ rc = ftruncate(fd_dst, pos);
+ if (rc < 0) {
+ rc = -errno;
+ return rc;
+ }
+ }
+
while (1) {
- /*
- * read new data only if we have written all
- * previously read data
- */
- if (wpos == rpos) {
- if (check_file) {
- rc = check_file(fd_src);
+ off_t data_off;
+ size_t to_read, to_write;
+ ssize_t rsize;
+
+ if (sparse && pos >= data_end) {
+ size_t data_size;
+
+ data_off = llapi_data_seek(fd_src, pos, &data_size);
+ if (data_off < 0) {
+ /* Non-fatal, switch to full copy */
+ sparse = false;
+ continue;
+ }
+ /* hole at the end of file, truncate up to it */
+ if (!data_size) {
+ rc = ftruncate(fd_dst, data_off);
if (rc < 0)
goto out;
}
+ pos = data_off & ~(page_size - 1);
+ data_end = data_off + data_size;
+ to_read = ((data_end - pos - 1) | (page_size - 1)) + 1;
+ to_read = MIN(to_read, buf_size);
+ } else {
+ to_read = buf_size;
+ }
- rsize = read(fd_src, buf, buf_size);
- if (rsize < 0) {
- rc = -errno;
+ if (check_file) {
+ rc = check_file(fd_src);
+ if (rc < 0)
goto out;
- }
-
- rpos += rsize;
- bufoff = 0;
}
- /* eof ? */
+ rsize = pread(fd_src, buf, to_read, pos);
+ if (rsize < 0) {
+ rc = -errno;
+ goto out;
+ }
+ /* EOF */
if (rsize == 0)
break;
- wsize = write(fd_dst, buf + bufoff, rpos - wpos);
- if (wsize < 0) {
- rc = -errno;
- break;
+ to_write = rsize;
+ while (to_write > 0) {
+ ssize_t written;
+
+ written = pwrite(fd_dst, buf, to_write, pos);
+ if (written < 0) {
+ rc = -errno;
+ goto out;
+ }
+ pos += written;
+ to_write -= written;
}
- wpos += wsize;
- bufoff += wsize;
+ if (rc || rsize < to_read)
+ break;
}
rc = fsync(fd_dst);
--- /dev/null
+/*
+ * LGPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * (C) Copyright (c) 2020, DataDirect Networks Inc, all rights reserved.
+ *
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the GNU Lesser General Public License
+ * LGPL version 2.1 or (at your discretion) any later version.
+ * LGPL version 2.1 accompanies this distribution, and is available at
+ * http://www.gnu.org/licenses/lgpl-2.1.html
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * LGPL HEADER END
+ */
+/*
+ * lustre/utils/liblustreapi_lseek.c
+ *
+ * lustreapi library for lseek-related functionality
+ *
+ * Author: Mikhail Pershin <mpershin@whamcloud.com>
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include <lustre/lustreapi.h>
+#include "lustreapi_internal.h"
+
+/**
+ * Check if file has a hole
+ *
+ * \param fd file descriptor
+ *
+ * \retval boolean, true if file has a hole, false otherwise
+ */
+bool llapi_file_is_sparse(int fd)
+{
+ off_t file_end, hole_off;
+
+ file_end = lseek(fd, 0, SEEK_END);
+ hole_off = lseek(fd, 0, SEEK_HOLE);
+
+ /* Errors are ignored and file is just reported as non-sparse */
+ return file_end > 0 && hole_off >= 0 && hole_off < file_end;
+}
+
+/**
+ * Get the first data segment in given extent.
+ *
+ * \param src_fd source file descriptor
+ * \param offset offset to start from
+ * \param length length of data segment found
+ *
+ * \retval next data offset and length on \p length on success.
+ * \retval -errno on failure.
+ */
+off_t llapi_data_seek(int src_fd, off_t offset, size_t *length)
+{
+ off_t data_off, hole_off;
+ int rc;
+
+ if (offset < 0) {
+ rc = -EINVAL;
+ llapi_error(LLAPI_MSG_ERROR, rc, "wrong offset: %jd",
+ offset);
+ return rc;
+ }
+
+ data_off = lseek(src_fd, offset, SEEK_DATA);
+ if (data_off < 0) {
+ if (errno != ENXIO) {
+ rc = -errno;
+ llapi_error(LLAPI_MSG_ERROR, rc,
+ "failed SEEK_DATA from %jd",
+ offset);
+ return rc;
+ }
+ hole_off = lseek(src_fd, 0, SEEK_END);
+ if (data_off > hole_off) /* out of file range */
+ return -ENXIO;
+ /* no more data in src file, return end of file and zero size
+ * so caller will know there must be hole up to that offset
+ */
+ *length = 0;
+ return hole_off;
+ }
+
+ hole_off = lseek(src_fd, data_off, SEEK_HOLE);
+ if (hole_off < 0) {
+ rc = -errno;
+ llapi_error(LLAPI_MSG_ERROR, rc,
+ "failed SEEK_HOLE from %jd", data_off);
+ return rc;
+ }
+ *length = hole_off - data_off;
+ return data_off;
+}
return rc;
}
+bool llapi_mirror_is_sparse(int fd, unsigned int id)
+{
+ bool sparse;
+ int rc;
+
+ rc = llapi_mirror_set(fd, id);
+ if (rc < 0)
+ return false;
+
+ sparse = llapi_file_is_sparse(fd);
+ (void) llapi_mirror_clear(fd);
+
+ return sparse;
+}
+
+/**
+ * Seek data in a specified mirror with @id. This function looks for the
+ * first data segment from given offset and returns its offset and length
+ *
+ * \param fd file descriptor, should be opened with O_DIRECT
+ * \param id mirror id to be read from
+ * \param pos position for start data seek from
+ * \param size size of data segment found
+ *
+ * \result >= 0 Number of bytes has been read
+ * \result < 0 The last seen error
+ */
+off_t llapi_mirror_data_seek(int fd, unsigned int id, off_t pos, size_t *size)
+{
+ off_t data_off;
+ int rc;
+
+ rc = llapi_mirror_set(fd, id);
+ if (rc < 0)
+ return rc;
+
+ data_off = llapi_data_seek(fd, pos, size);
+ (void) llapi_mirror_clear(fd);
+
+ return data_off;
+}
+
/**
* Copy data contents from source mirror @src to multiple destinations
* pointed by @dst. The destination array @dst will be altered to store
{
const size_t buflen = 4 * 1024 * 1024; /* 4M */
void *buf;
- loff_t pos = 0;
+ off_t pos = 0;
+ off_t data_end = 0;
size_t page_size = sysconf(_SC_PAGESIZE);
ssize_t result = 0;
bool eof = false;
+ bool sparse;
int nr;
int i;
int rc;
if (rc) /* error code is returned directly */
return -rc;
+ sparse = llapi_mirror_is_sparse(fd, src);
+
nr = count;
+ if (sparse) {
+ /* for sparse src we have to be sure that dst has no
+ * data in src holes, so truncate it first
+ */
+ for (i = 0; i < nr; i++) {
+ rc = llapi_mirror_truncate(fd, dst[i], pos);
+ if (rc < 0) {
+ result = rc;
+ /* exclude the failed one */
+ dst[i] = dst[--nr];
+ i--;
+ continue;
+ }
+ }
+ if (!nr)
+ return result;
+ }
+
while (!eof) {
+ off_t data_off;
ssize_t bytes_read;
- size_t to_write;
+ size_t to_write, to_read;
+
+ if (sparse && pos >= data_end) {
+ size_t data_size;
+
+ data_off = llapi_mirror_data_seek(fd, src, pos,
+ &data_size);
+ if (data_off < 0) {
+ /* Non-fatal, switch to full copy */
+ sparse = false;
+ continue;
+ }
+ if (!data_size) {
+ /* hole at the end of file, set pos to the
+ * data_off, so truncate block at the end
+ * will set final dst size.
+ */
+ pos = data_off;
+ break;
+ }
+
+ data_end = data_off + data_size;
+ /* align by page */
+ pos = data_off & ~(page_size - 1);
+ data_end = ((data_end - 1) | (page_size - 1)) + 1;
+ to_read = MIN(data_end - pos, buflen);
+ } else {
+ to_read = buflen;
+ }
- bytes_read = llapi_mirror_read(fd, src, buf, buflen, pos);
+ bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
if (!bytes_read) { /* end of file */
break;
} else if (bytes_read < 0) {
i--;
continue;
}
-
assert(written == to_write);
}
-
pos += bytes_read;
- eof = bytes_read < buflen;
+ eof = bytes_read < to_read;
}
free(buf);