1 // SPDX-License-Identifier: LGPL-2.1+
3 * Copyright (c) 2017, Intel Corporation.
6 * This file is part of Lustre, http://www.lustre.org/
8 * library for creating and managing File Level Redundancy (FLR) mirrors
10 * Author: Jinshan Xiong <jinshan.xiong@intel.com>
17 #include <sys/ioctl.h>
24 #include <sys/types.h>
25 #include <sys/xattr.h>
27 #include <sys/param.h>
29 #include <libcfs/util/ioctl.h>
30 #include <lustre/lustreapi.h>
31 #include <linux/lustre/lustre_ioctl.h>
34 * Set the mirror id for the opening file pointed by @fd, once the mirror
35 * is set successfully, the policy to choose mirrors will be disabed and the
36 * following I/O from this file descriptor will be led to this dedicated
38 * If @id is zero, it will clear the mirror id setting.
40 * \param fd file descriptor, must be opened with O_DIRECT
43 * \retval 0 on success.
44 * \retval -errno on failure.
46 int llapi_mirror_set(int fd, unsigned int id)
51 rc = ioctl(fd, LL_IOC_FLR_SET_MIRROR, id);
60 /* in the current implementation, llite doesn't verify if the mirror
61 * id is valid, it has to be verified in an I/O context so the fstat()
62 * call is to verify that the mirror id is correct. */
63 rc = fstat(fd, &stbuf);
67 (void) ioctl(fd, LL_IOC_FLR_SET_MIRROR, 0);
74 * Clear mirror id setting.
76 * \See llapi_mirror_set() for details.
78 int llapi_mirror_clear(int fd)
80 return llapi_mirror_set(fd, 0);
84 * Read data from a specified mirror with @id. This function won't read
85 * partial read result; either file end is reached, or number of @count bytes
86 * is read, or an error will be returned.
88 * \param fd file descriptor, should be opened with O_DIRECT
89 * \param id mirror id to be read from
90 * \param buf read buffer
91 * \param count number of bytes to be read
92 * \param pos file postion where the read starts
94 * \result >= 0 Number of bytes has been read
95 * \result < 0 The last seen error
97 ssize_t llapi_mirror_read(int fd, unsigned int id, void *buf, size_t count,
104 page_size = sysconf(_SC_PAGESIZE);
110 rc = llapi_mirror_set(fd, id);
117 bytes_read = pread(fd, buf, count, pos);
118 if (!bytes_read) /* end of file */
121 if (bytes_read < 0) {
123 llapi_error(LLAPI_MSG_WARN, result,
124 "fail to pread %ld-%ld of mirror %u",
129 result += bytes_read;
134 if (bytes_read & (page_size - 1)) /* end of file */
138 (void) llapi_mirror_clear(fd);
143 ssize_t llapi_mirror_write(int fd, unsigned int id, const void *buf,
144 size_t count, off_t pos)
150 page_size = sysconf(_SC_PAGESIZE);
154 if (((unsigned long)buf & (page_size - 1)) || pos & (page_size - 1))
157 rc = llapi_mirror_set(fd, id);
162 ssize_t bytes_written;
164 if (pos & (page_size - 1)) {
169 bytes_written = pwrite(fd, buf, count, pos);
170 if (bytes_written < 0) {
172 llapi_error(LLAPI_MSG_WARN, result,
173 "fail to pwrite %ld-%ld of mirror %u",
178 result += bytes_written;
179 pos += bytes_written;
180 buf += bytes_written;
181 count -= bytes_written;
184 (void) llapi_mirror_clear(fd);
189 int llapi_mirror_truncate(int fd, unsigned int id, off_t length)
193 rc = llapi_mirror_set(fd, id);
197 rc = ftruncate(fd, length);
200 llapi_error(LLAPI_MSG_WARN, rc,
201 "fail to ftruncate mirror %u to %ld", id, length);
204 (void) llapi_mirror_clear(fd);
209 int llapi_mirror_punch(int fd, unsigned int id, off_t start, size_t length)
213 rc = llapi_mirror_set(fd, id);
217 rc = llapi_hole_punch(fd, start, length);
218 (void) llapi_mirror_clear(fd);
223 bool llapi_mirror_is_sparse(int fd, unsigned int id)
228 rc = llapi_mirror_set(fd, id);
232 sparse = llapi_file_is_sparse(fd);
233 (void) llapi_mirror_clear(fd);
239 * Seek data in a specified mirror with @id. This function looks for the
240 * first data segment from given offset and returns its offset and length
242 * \param fd file descriptor, should be opened with O_DIRECT
243 * \param id mirror id to be read from
244 * \param pos position for start data seek from
245 * \param size size of data segment found
247 * \result >= 0 Number of bytes has been read
248 * \result < 0 The last seen error
250 off_t llapi_mirror_data_seek(int fd, unsigned int id, off_t pos, size_t *size)
255 rc = llapi_mirror_set(fd, id);
259 data_off = llapi_data_seek(fd, pos, size);
260 (void) llapi_mirror_clear(fd);
266 * Copy data contents from source mirror @src to multiple destinations
267 * pointed by @dst. The destination array @dst will be altered to store
268 * successfully copied mirrors.
270 * \param fd file descriptor, should be opened with O_DIRECT
271 * \param src source mirror id, usually a valid mirror
272 * \param dst an array of destination mirror ids
273 * \param count number of elements in array @dst
275 * \result > 0 Number of mirrors successfully copied
276 * \result < 0 The last seen error
278 ssize_t llapi_mirror_copy_many(int fd, __u16 src, __u16 *dst, size_t count)
280 const size_t buflen = 4 * 1024 * 1024; /* 4M */
295 page_size = sysconf(_SC_PAGESIZE);
301 rc = posix_memalign(&buf, page_size, buflen);
302 if (rc) /* error code is returned directly */
305 sparse = llapi_mirror_is_sparse(fd, src);
309 /* for sparse src we have to be sure that dst has no
310 * data in src holes, so truncate it first
312 for (i = 0; i < nr; i++) {
313 rc = llapi_mirror_truncate(fd, dst[i], pos);
316 /* exclude the failed one */
331 size_t to_write, to_read;
333 if (sparse && pos >= data_end) {
336 data_off = llapi_mirror_data_seek(fd, src, pos,
339 /* Non-fatal, switch to full copy */
344 /* hole at the end of file, set pos to the
345 * data_off, so truncate block at the end
346 * will set final dst size.
352 data_end = data_off + data_size;
354 pos = data_off & ~(page_size - 1);
355 data_end = ((data_end - 1) | (page_size - 1)) + 1;
356 to_read = MIN(data_end - pos, buflen);
361 bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
362 if (!bytes_read) { /* end of file */
364 } else if (bytes_read < 0) {
367 llapi_error(LLAPI_MSG_ERROR, result,
368 "error reading bytes %ld-%ld of mirror %u",
373 /* round up to page align to make direct IO happy.
374 * this implies the last segment to write. */
375 to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
377 for (i = 0; i < nr; i++) {
380 written = llapi_mirror_write(fd, dst[i], buf,
385 /* this mirror is not written succesfully,
386 * get rid of it from the array */
391 assert(written == to_write);
394 eof = bytes_read < to_read;
400 for (i = 0; i < nr; i++) {
401 rc = llapi_mirror_truncate(fd, dst[i], pos);
405 /* exclude the failed one */
413 return nr > 0 ? nr : result;
417 * Copy data contents from source mirror @src to target mirror @dst.
419 * \param fd file descriptor, should be opened with O_DIRECT
420 * \param src source mirror id, usually a valid mirror
421 * \param dst mirror id of copy destination
422 * \param pos start file pos
423 * \param count number of bytes to be copied
425 * \result > 0 Number of mirrors successfully copied
426 * \result < 0 The last seen error
428 int llapi_mirror_copy(int fd, unsigned int src, unsigned int dst, off_t pos,
431 const size_t buflen = 4 * 1024 * 1024; /* 4M */
440 page_size = sysconf(_SC_PAGESIZE);
444 if (pos & (page_size - 1) || !dst)
447 if (count != OBD_OBJECT_EOF && count & (page_size - 1))
450 rc = posix_memalign(&buf, page_size, buflen);
451 if (rc) /* error code is returned directly */
454 while (result < count) {
455 ssize_t bytes_read, bytes_written;
456 size_t to_read, to_write;
458 to_read = MIN(buflen, count - result);
460 bytes_read = pread(fd, buf, to_read, pos);
462 bytes_read = llapi_mirror_read(fd, src, buf, to_read,
464 if (!bytes_read) { /* end of file */
466 } else if (bytes_read < 0) {
468 llapi_error(LLAPI_MSG_ERROR, result,
469 "error reading bytes %ld-%ld of mirror %u",
474 /* round up to page align to make direct IO happy.
475 * this implies the last segment to write. */
476 to_write = (bytes_read + page_size - 1) & ~(page_size - 1);
478 bytes_written = llapi_mirror_write(fd, dst, buf, to_write,
480 if (bytes_written < 0) {
481 result = bytes_written;
482 llapi_error(LLAPI_MSG_ERROR, result,
483 "error writing bytes %ld-%ld of mirror %u",
488 assert(bytes_written == to_write);
491 result += bytes_read;
493 if (bytes_read < to_read) /* short read occurred */
500 rc = llapi_mirror_truncate(fd, dst, pos);
502 llapi_error(LLAPI_MSG_ERROR, result,
503 "error truncating mirror %u to %ld",