4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the GNU Lesser General Public License
8 * (LGPL) version 2.1 or (at your discretion) any later version.
9 * (LGPL) version 2.1 accompanies this distribution, and is available at
10 * http://www.gnu.org/licenses/lgpl-2.1.html
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
20 * lustre/utils/liblustreapi_layout.c
22 * lustreapi library for layout calls for interacting with the layout of
23 * Lustre files while hiding details of the internal data structures
26 * Copyright (c) 2016, 2017, Intel Corporation.
28 * Author: Ned Bass <bass6@llnl.gov>
38 #include <sys/xattr.h>
39 #include <sys/param.h>
41 #include <libcfs/util/list.h>
42 #include <lustre/lustreapi.h>
43 #include "lustreapi_internal.h"
46 * Layout component, which contains all attributes of a plain
49 struct llapi_layout_comp {
51 uint64_t llc_stripe_size;
52 uint64_t llc_stripe_count;
53 uint64_t llc_stripe_offset;
54 /* Add 1 so user always gets back a null terminated string. */
55 char llc_pool_name[LOV_MAXPOOLNAME + 1];
56 /** Number of objects in llc_objects array if was initialized. */
57 uint32_t llc_objects_count;
58 struct lov_user_ost_data_v1 *llc_objects;
59 /* fields used only for composite layouts */
60 struct lu_extent llc_extent; /* [start, end) of component */
61 uint32_t llc_id; /* unique ID of component */
62 uint32_t llc_flags; /* LCME_FL_* flags */
63 uint64_t llc_timestamp; /* snapshot timestamp */
64 struct list_head llc_list; /* linked to the llapi_layout
69 * An Opaque data type abstracting the layout of a Lustre file.
72 uint32_t llot_magic; /* LLAPI_LAYOUT_MAGIC */
75 bool llot_is_composite;
76 uint16_t llot_mirror_count;
77 /* Cursor pointing to one of the components in llot_comp_list */
78 struct llapi_layout_comp *llot_cur_comp;
79 struct list_head llot_comp_list;
83 * Compute the number of elements in the lmm_objects array of \a lum
84 * with size \a lum_size.
86 * \param[in] lum the struct lov_user_md to check
87 * \param[in] lum_size the number of bytes in \a lum
89 * \retval number of elements in array lum->lmm_objects
91 static int llapi_layout_objects_in_lum(struct lov_user_md *lum, size_t lum_size)
96 if (lum_size < lov_user_md_size(0, LOV_MAGIC_V1))
99 if (lum->lmm_magic == __swab32(LOV_MAGIC_V1) ||
100 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
101 magic = __swab32(lum->lmm_magic);
103 magic = lum->lmm_magic;
105 base_size = lov_user_md_size(0, magic);
107 if (lum_size <= base_size)
110 return (lum_size - base_size) / sizeof(lum->lmm_objects[0]);
114 * Byte-swap the fields of struct lov_user_md.
116 * XXX Rather than duplicating swabbing code here, we should eventually
117 * refactor the needed functions in lustre/ptlrpc/pack_generic.c
118 * into a library that can be shared between kernel and user code.
121 llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
123 int i, j, ent_count, obj_count;
124 struct lov_comp_md_v1 *comp_v1 = NULL;
125 struct lov_comp_md_entry_v1 *ent;
126 struct lov_user_ost_data *lod;
128 if (lum->lmm_magic != __swab32(LOV_MAGIC_V1) &&
129 lum->lmm_magic != __swab32(LOV_MAGIC_V3) &&
130 lum->lmm_magic != __swab32(LOV_MAGIC_COMP_V1))
133 if (lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
134 comp_v1 = (struct lov_comp_md_v1 *)lum;
136 if (comp_v1 != NULL) {
137 __swab32s(&comp_v1->lcm_magic);
138 __swab32s(&comp_v1->lcm_size);
139 __swab32s(&comp_v1->lcm_layout_gen);
140 __swab16s(&comp_v1->lcm_flags);
141 __swab16s(&comp_v1->lcm_entry_count);
142 ent_count = comp_v1->lcm_entry_count;
147 for (i = 0; i < ent_count; i++) {
148 if (comp_v1 != NULL) {
149 ent = &comp_v1->lcm_entries[i];
150 __swab32s(&ent->lcme_id);
151 __swab32s(&ent->lcme_flags);
152 __swab64s(&ent->lcme_timestamp);
153 __swab64s(&ent->lcme_extent.e_start);
154 __swab64s(&ent->lcme_extent.e_end);
155 __swab32s(&ent->lcme_offset);
156 __swab32s(&ent->lcme_size);
158 lum = (struct lov_user_md *)((char *)comp_v1 +
160 lum_size = ent->lcme_size;
162 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
164 __swab32s(&lum->lmm_magic);
165 __swab32s(&lum->lmm_pattern);
166 __swab32s(&lum->lmm_stripe_size);
167 __swab16s(&lum->lmm_stripe_count);
168 __swab16s(&lum->lmm_stripe_offset);
170 if (lum->lmm_magic != LOV_MAGIC_V1) {
171 struct lov_user_md_v3 *v3;
172 v3 = (struct lov_user_md_v3 *)lum;
173 lod = v3->lmm_objects;
175 lod = lum->lmm_objects;
178 for (j = 0; j < obj_count; j++)
179 __swab32s(&lod[j].l_ost_idx);
184 * (Re-)allocate llc_objects[] to \a num_stripes stripes.
186 * Copy over existing llc_objects[], if any, to the new llc_objects[].
188 * \param[in] layout existing layout to be modified
189 * \param[in] num_stripes number of stripes in new layout
191 * \retval 0 if the objects are re-allocated successfully
192 * \retval -1 on error with errno set
194 static int __llapi_comp_objects_realloc(struct llapi_layout_comp *comp,
195 unsigned int new_stripes)
197 struct lov_user_ost_data_v1 *new_objects;
200 if (new_stripes > LOV_MAX_STRIPE_COUNT) {
205 if (new_stripes == comp->llc_objects_count)
208 if (new_stripes != 0 && new_stripes <= comp->llc_objects_count)
211 new_objects = realloc(comp->llc_objects,
212 sizeof(*new_objects) * new_stripes);
213 if (new_objects == NULL && new_stripes != 0) {
218 for (i = comp->llc_objects_count; i < new_stripes; i++)
219 new_objects[i].l_ost_idx = LLAPI_LAYOUT_IDX_MAX;
221 comp->llc_objects = new_objects;
222 comp->llc_objects_count = new_stripes;
228 * Allocate storage for a llapi_layout_comp with \a num_stripes stripes.
230 * \param[in] num_stripes number of stripes in new layout
232 * \retval valid pointer if allocation succeeds
233 * \retval NULL if allocation fails
235 static struct llapi_layout_comp *__llapi_comp_alloc(unsigned int num_stripes)
237 struct llapi_layout_comp *comp;
239 if (num_stripes > LOV_MAX_STRIPE_COUNT) {
244 comp = calloc(1, sizeof(*comp));
250 comp->llc_objects = NULL;
251 comp->llc_objects_count = 0;
253 if (__llapi_comp_objects_realloc(comp, num_stripes) < 0) {
259 comp->llc_pattern = LLAPI_LAYOUT_DEFAULT;
260 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
261 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
262 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
263 comp->llc_pool_name[0] = '\0';
264 comp->llc_extent.e_start = 0;
265 comp->llc_extent.e_end = LUSTRE_EOF;
268 INIT_LIST_HEAD(&comp->llc_list);
274 * Free memory allocated for \a comp
276 * \param[in] comp previously allocated by __llapi_comp_alloc()
278 static void __llapi_comp_free(struct llapi_layout_comp *comp)
280 if (comp->llc_objects != NULL)
281 free(comp->llc_objects);
286 * Free memory allocated for \a layout.
288 * \param[in] layout previously allocated by llapi_layout_alloc()
290 void llapi_layout_free(struct llapi_layout *layout)
292 struct llapi_layout_comp *comp, *n;
297 list_for_each_entry_safe(comp, n, &layout->llot_comp_list, llc_list) {
298 list_del_init(&comp->llc_list);
299 __llapi_comp_free(comp);
305 * Allocate and initialize a llapi_layout structure.
307 * \retval valid llapi_layout pointer on success
308 * \retval NULL if memory allocation fails
310 static struct llapi_layout *__llapi_layout_alloc(void)
312 struct llapi_layout *layout;
314 layout = calloc(1, sizeof(*layout));
315 if (layout == NULL) {
321 layout->llot_magic = LLAPI_LAYOUT_MAGIC;
322 layout->llot_gen = 0;
323 layout->llot_flags = 0;
324 layout->llot_is_composite = false;
325 layout->llot_mirror_count = 1;
326 layout->llot_cur_comp = NULL;
327 INIT_LIST_HEAD(&layout->llot_comp_list);
333 * Allocate and initialize a new plain layout.
335 * \retval valid llapi_layout pointer on success
336 * \retval NULL if memory allocation fails
338 struct llapi_layout *llapi_layout_alloc(void)
340 struct llapi_layout_comp *comp;
341 struct llapi_layout *layout;
343 layout = __llapi_layout_alloc();
347 comp = __llapi_comp_alloc(0);
353 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
354 layout->llot_cur_comp = comp;
360 * Check if the given \a lum_size is large enough to hold the required
363 * \param[in] lum the struct lov_user_md to check
364 * \param[in] lum_size the number of bytes in \a lum
366 * \retval true the \a lum_size is too small
367 * \retval false the \a lum_size is large enough
369 static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
373 if (lum_size < sizeof(lum->lmm_magic))
376 if (lum->lmm_magic == LOV_MAGIC_V1 ||
377 lum->lmm_magic == __swab32(LOV_MAGIC_V1))
378 magic = LOV_MAGIC_V1;
379 else if (lum->lmm_magic == LOV_MAGIC_V3 ||
380 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
381 magic = LOV_MAGIC_V3;
382 else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
383 lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
384 magic = LOV_MAGIC_COMP_V1;
388 if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
389 return lum_size < lov_user_md_size(0, magic);
391 return lum_size < sizeof(struct lov_comp_md_v1);
394 /* Verify if the objects count in lum is consistent with the
395 * stripe count in lum. It applies to regular file only. */
396 static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
398 struct lov_comp_md_v1 *comp_v1 = NULL;
399 int i, ent_count, obj_count;
401 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
402 comp_v1 = (struct lov_comp_md_v1 *)lum;
403 ent_count = comp_v1->lcm_entry_count;
404 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
405 lum->lmm_magic == LOV_MAGIC_V3) {
411 for (i = 0; i < ent_count; i++) {
413 lum = (struct lov_user_md *)((char *)comp_v1 +
414 comp_v1->lcm_entries[i].lcme_offset);
415 lum_size = comp_v1->lcm_entries[i].lcme_size;
417 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
420 if (!(comp_v1->lcm_entries[i].lcme_flags &
421 LCME_FL_INIT) && obj_count != 0)
423 } else if (obj_count != lum->lmm_stripe_count) {
431 * Convert the data from a lov_user_md to a newly allocated llapi_layout.
432 * The caller is responsible for freeing the returned pointer.
434 * \param[in] lov_xattr LOV user metadata xattr to copy data from
435 * \param[in] lov_xattr_size size the lov_xattr_size passed in
436 * \param[in] flags bitwise-or'd flags to control the behavior
438 * \retval valid llapi_layout pointer on success
439 * \retval NULL if memory allocation fails
441 struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
442 ssize_t lov_xattr_size,
445 struct lov_user_md *lum = lov_xattr;
446 struct lov_comp_md_v1 *comp_v1 = NULL;
447 struct lov_comp_md_entry_v1 *ent;
448 struct lov_user_md *v1;
449 struct llapi_layout *layout = NULL;
450 struct llapi_layout_comp *comp;
451 int i, ent_count = 0, obj_count;
453 if (lov_xattr == NULL || lov_xattr_size <= 0) {
458 /* Return an error if we got back a partial layout. */
459 if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) {
464 #if __BYTE_ORDER == __BIG_ENDIAN
465 if (flags & LLAPI_LXF_COPY) {
466 lum = malloc(lov_xattr_size);
471 memcpy(lum, lov_xattr, lov_xattr_size);
475 llapi_layout_swab_lov_user_md(lum, lov_xattr_size);
477 if ((flags & LLAPI_LXF_CHECK) &&
478 !llapi_layout_lum_valid(lum, lov_xattr_size)) {
483 layout = __llapi_layout_alloc();
484 if (layout == NULL) {
489 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
490 comp_v1 = (struct lov_comp_md_v1 *)lum;
491 ent_count = comp_v1->lcm_entry_count;
492 layout->llot_gen = comp_v1->lcm_layout_gen;
493 layout->llot_is_composite = true;
494 layout->llot_mirror_count = comp_v1->lcm_mirror_count + 1;
495 layout->llot_gen = comp_v1->lcm_layout_gen;
496 layout->llot_flags = comp_v1->lcm_flags;
497 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
498 lum->lmm_magic == LOV_MAGIC_V3) {
500 layout->llot_is_composite = false;
502 if (lov_xattr_size <= 0) {
511 if (ent_count == 0) {
516 v1 = (struct lov_user_md *)lum;
517 for (i = 0; i < ent_count; i++) {
518 if (comp_v1 != NULL) {
519 ent = &comp_v1->lcm_entries[i];
520 v1 = (struct lov_user_md *)((char *)comp_v1 +
522 lov_xattr_size = ent->lcme_size;
527 obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
528 comp = __llapi_comp_alloc(obj_count);
533 comp->llc_extent.e_start = ent->lcme_extent.e_start;
534 comp->llc_extent.e_end = ent->lcme_extent.e_end;
535 comp->llc_id = ent->lcme_id;
536 comp->llc_flags = ent->lcme_flags;
537 if (comp->llc_flags & LCME_FL_NOSYNC)
538 comp->llc_timestamp = ent->lcme_timestamp;
540 comp->llc_extent.e_start = 0;
541 comp->llc_extent.e_end = LUSTRE_EOF;
546 if (v1->lmm_pattern == LOV_PATTERN_RAID0)
547 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
548 else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 |
549 LOV_PATTERN_OVERSTRIPING))
550 comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING;
552 /* Lustre only supports RAID0 for now. */
553 comp->llc_pattern = v1->lmm_pattern;
555 if (v1->lmm_stripe_size == 0)
556 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
558 comp->llc_stripe_size = v1->lmm_stripe_size;
560 if (v1->lmm_stripe_count == (typeof(v1->lmm_stripe_count))-1)
561 comp->llc_stripe_count = LLAPI_LAYOUT_WIDE;
562 else if (v1->lmm_stripe_count == 0)
563 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
565 comp->llc_stripe_count = v1->lmm_stripe_count;
567 if (v1->lmm_stripe_offset ==
568 (typeof(v1->lmm_stripe_offset))-1)
569 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
571 comp->llc_stripe_offset = v1->lmm_stripe_offset;
573 if (v1->lmm_magic != LOV_USER_MAGIC_V1) {
574 const struct lov_user_md_v3 *lumv3;
575 lumv3 = (struct lov_user_md_v3 *)v1;
576 snprintf(comp->llc_pool_name,
577 sizeof(comp->llc_pool_name),
578 "%s", lumv3->lmm_pool_name);
579 memcpy(comp->llc_objects, lumv3->lmm_objects,
580 obj_count * sizeof(lumv3->lmm_objects[0]));
582 const struct lov_user_md_v1 *lumv1;
583 lumv1 = (struct lov_user_md_v1 *)v1;
584 memcpy(comp->llc_objects, lumv1->lmm_objects,
585 obj_count * sizeof(lumv1->lmm_objects[0]));
589 comp->llc_stripe_offset =
590 comp->llc_objects[0].l_ost_idx;
592 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
593 layout->llot_cur_comp = comp;
597 if (lum != lov_xattr)
601 llapi_layout_free(layout);
606 __u32 llapi_pattern_to_lov(uint64_t pattern)
611 case LLAPI_LAYOUT_DEFAULT:
612 lov_pattern = LOV_PATTERN_RAID0;
614 case LLAPI_LAYOUT_RAID0:
615 lov_pattern = LOV_PATTERN_RAID0;
617 case LLAPI_LAYOUT_MDT:
618 lov_pattern = LOV_PATTERN_MDT;
620 case LLAPI_LAYOUT_OVERSTRIPING:
621 lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0;
624 lov_pattern = EINVAL;
631 * Convert the data from a llapi_layout to a newly allocated lov_user_md.
632 * The caller is responsible for freeing the returned pointer.
634 * \param[in] layout the layout to copy from
636 * \retval valid lov_user_md pointer on success
637 * \retval NULL if memory allocation fails or the layout is invalid
639 static struct lov_user_md *
640 llapi_layout_to_lum(const struct llapi_layout *layout)
642 struct llapi_layout_comp *comp;
643 struct lov_comp_md_v1 *comp_v1 = NULL;
644 struct lov_comp_md_entry_v1 *ent;
645 struct lov_user_md *lum = NULL;
650 if (layout == NULL ||
651 list_empty((struct list_head *)&layout->llot_comp_list)) {
656 /* Allocate header of lov_comp_md_v1 if necessary */
657 if (layout->llot_is_composite) {
660 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
663 lum_size = sizeof(*comp_v1) + comp_cnt * sizeof(*ent);
664 lum = calloc(lum_size, 1);
669 comp_v1 = (struct lov_comp_md_v1 *)lum;
670 comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
671 comp_v1->lcm_size = lum_size;
672 comp_v1->lcm_layout_gen = 0;
673 comp_v1->lcm_flags = layout->llot_flags;
674 comp_v1->lcm_entry_count = comp_cnt;
675 comp_v1->lcm_mirror_count = layout->llot_mirror_count - 1;
679 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
680 struct lov_user_md *blob;
683 int i, obj_count = 0;
684 struct lov_user_ost_data *lmm_objects;
685 uint64_t pattern = comp->llc_pattern;
687 if ((pattern & LLAPI_LAYOUT_SPECIFIC) != 0) {
688 if (comp->llc_objects_count <
689 comp->llc_stripe_count) {
693 magic = LOV_USER_MAGIC_SPECIFIC;
694 obj_count = comp->llc_stripe_count;
695 pattern &= ~LLAPI_LAYOUT_SPECIFIC;
696 } else if (strlen(comp->llc_pool_name) != 0) {
697 magic = LOV_USER_MAGIC_V3;
699 magic = LOV_USER_MAGIC_V1;
701 /* All stripes must be specified when the pattern contains
702 * LLAPI_LAYOUT_SPECIFIC */
703 for (i = 0; i < obj_count; i++) {
704 if (comp->llc_objects[i].l_ost_idx ==
705 LLAPI_LAYOUT_IDX_MAX) {
711 blob_size = lov_user_md_size(obj_count, magic);
712 blob = realloc(lum, lum_size + blob_size);
718 comp_v1 = (struct lov_comp_md_v1 *)lum;
719 blob = (struct lov_user_md *)((char *)lum + lum_size);
720 lum_size += blob_size;
723 blob->lmm_magic = magic;
724 blob->lmm_pattern = llapi_pattern_to_lov(pattern);
725 if (blob->lmm_pattern == EINVAL) {
730 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
731 blob->lmm_stripe_size = 0;
733 blob->lmm_stripe_size = comp->llc_stripe_size;
735 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
736 blob->lmm_stripe_count = 0;
737 else if (comp->llc_stripe_count == LLAPI_LAYOUT_WIDE)
738 blob->lmm_stripe_count = LOV_ALL_STRIPES;
740 blob->lmm_stripe_count = comp->llc_stripe_count;
742 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
743 blob->lmm_stripe_offset = -1;
745 blob->lmm_stripe_offset = comp->llc_stripe_offset;
747 if (magic == LOV_USER_MAGIC_V3 ||
748 magic == LOV_USER_MAGIC_SPECIFIC) {
749 struct lov_user_md_v3 *lumv3 =
750 (struct lov_user_md_v3 *)blob;
752 if (comp->llc_pool_name[0] != '\0') {
753 strncpy(lumv3->lmm_pool_name,
755 sizeof(lumv3->lmm_pool_name));
757 memset(lumv3->lmm_pool_name, 0,
758 sizeof(lumv3->lmm_pool_name));
760 lmm_objects = lumv3->lmm_objects;
762 lmm_objects = blob->lmm_objects;
765 for (i = 0; i < obj_count; i++)
766 lmm_objects[i].l_ost_idx =
767 comp->llc_objects[i].l_ost_idx;
769 if (layout->llot_is_composite) {
770 ent = &comp_v1->lcm_entries[ent_idx];
771 ent->lcme_id = comp->llc_id;
772 ent->lcme_flags = comp->llc_flags;
773 if (ent->lcme_flags & LCME_FL_NOSYNC)
774 ent->lcme_timestamp = comp->llc_timestamp;
775 ent->lcme_extent.e_start = comp->llc_extent.e_start;
776 ent->lcme_extent.e_end = comp->llc_extent.e_end;
777 ent->lcme_size = blob_size;
778 ent->lcme_offset = offset;
780 comp_v1->lcm_size += blob_size;
794 * Get the parent directory of a path.
796 * \param[in] path path to get parent of
797 * \param[out] buf buffer in which to store parent path
798 * \param[in] size size in bytes of buffer \a buf
800 static void get_parent_dir(const char *path, char *buf, size_t size)
804 strncpy(buf, path, size - 1);
805 p = strrchr(buf, '/');
809 } else if (size >= 2) {
810 strncpy(buf, ".", 2);
811 buf[size - 1] = '\0';
816 * Substitute unspecified attribute values in \a layout with values
817 * from fs global settings. (lov.stripesize, lov.stripecount,
820 * \param[in] layout layout to inherit values from
821 * \param[in] path file path of the filesystem
823 static void inherit_sys_attributes(struct llapi_layout *layout,
826 struct llapi_layout_comp *comp;
827 unsigned int ssize, scount, soffset;
830 rc = sattr_cache_get_defaults(NULL, path, &scount, &ssize, &soffset);
834 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
835 if (comp->llc_pattern == LLAPI_LAYOUT_DEFAULT)
836 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
837 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
838 comp->llc_stripe_size = ssize;
839 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
840 comp->llc_stripe_count = scount;
841 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
842 comp->llc_stripe_offset = soffset;
847 * Get the current component of \a layout.
849 * \param[in] layout layout to get current component
851 * \retval valid llapi_layout_comp pointer on success
852 * \retval NULL on error
854 static struct llapi_layout_comp *
855 __llapi_layout_cur_comp(const struct llapi_layout *layout)
857 struct llapi_layout_comp *comp;
859 if (layout == NULL || layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
863 if (layout->llot_cur_comp == NULL) {
867 /* Verify data consistency */
868 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
869 if (comp == layout->llot_cur_comp)
876 * Test if any attributes of \a layout are specified.
878 * \param[in] layout the layout to check
880 * \retval true any attributes are specified
881 * \retval false all attributes are unspecified
883 static bool is_any_specified(const struct llapi_layout *layout)
885 struct llapi_layout_comp *comp;
887 comp = __llapi_layout_cur_comp(layout);
891 if (layout->llot_is_composite || layout->llot_mirror_count != 1)
894 return comp->llc_pattern != LLAPI_LAYOUT_DEFAULT ||
895 comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT ||
896 comp->llc_stripe_count != LLAPI_LAYOUT_DEFAULT ||
897 comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT ||
898 strlen(comp->llc_pool_name);
902 * Get the striping layout for the file referenced by file descriptor \a fd.
904 * If the filesystem does not support the "lustre." xattr namespace, the
905 * file must be on a non-Lustre filesystem, so set errno to ENOTTY per
906 * convention. If the file has no "lustre.lov" data, the file will
907 * inherit default values, so return a default layout.
909 * If the kernel gives us back less than the expected amount of data,
910 * we fail with errno set to EINTR.
912 * \param[in] fd open file descriptor
913 * \param[in] flags open file descriptor
915 * \retval valid llapi_layout pointer on success
916 * \retval NULL if an error occurs
918 struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags)
921 struct lov_user_md *lum;
922 struct llapi_layout *layout = NULL;
926 lum_len = XATTR_SIZE_MAX;
927 lum = malloc(lum_len);
931 bytes_read = fgetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_len);
932 if (bytes_read < 0) {
933 if (errno == EOPNOTSUPP)
935 else if (errno == ENODATA)
936 layout = llapi_layout_alloc();
940 /* Directories may have a positive non-zero lum->lmm_stripe_count
941 * yet have an empty lum->lmm_objects array. For non-directories the
942 * amount of data returned from the kernel must be consistent
943 * with the stripe count. */
944 if (fstat(fd, &st) < 0)
947 layout = llapi_layout_get_by_xattr(lum, bytes_read,
948 S_ISDIR(st.st_mode) ? 0 : LLAPI_LXF_CHECK);
955 * Get the expected striping layout for a file at \a path.
957 * Substitute expected inherited attribute values for unspecified
958 * attributes. Unspecified attributes may belong to directories and
959 * never-written-to files, and indicate that default values will be
960 * assigned when files are created or first written to. A default value
961 * is inherited from the parent directory if the attribute is specified
962 * there, otherwise it is inherited from the filesystem root.
963 * Unspecified attributes normally have the value LLAPI_LAYOUT_DEFAULT.
965 * The complete \a path need not refer to an existing file or directory,
966 * but some leading portion of it must reside within a lustre filesystem.
967 * A use case for this interface would be to obtain the literal striping
968 * values that would be assigned to a new file in a given directory.
970 * \param[in] path path for which to get the expected layout
972 * \retval valid llapi_layout pointer on success
973 * \retval NULL if an error occurs
975 static struct llapi_layout *llapi_layout_expected(const char *path)
977 struct llapi_layout *path_layout = NULL;
978 char donor_path[PATH_MAX];
983 fd = open(path, O_RDONLY);
984 if (fd < 0 && errno != ENOENT)
990 path_layout = llapi_layout_get_by_fd(fd, 0);
996 if (path_layout == NULL) {
997 if (errno != ENODATA && errno != ENOENT)
1000 path_layout = llapi_layout_alloc();
1001 if (path_layout == NULL)
1005 if (is_any_specified(path_layout)) {
1006 inherit_sys_attributes(path_layout, path);
1010 llapi_layout_free(path_layout);
1012 rc = stat(path, &st);
1013 if (rc < 0 && errno != ENOENT)
1016 /* If path is a not a directory or doesn't exist, inherit layout
1017 * from parent directory. */
1018 if ((rc == 0 && !S_ISDIR(st.st_mode)) ||
1019 (rc < 0 && errno == ENOENT)) {
1020 get_parent_dir(path, donor_path, sizeof(donor_path));
1021 path_layout = llapi_layout_get_by_path(donor_path, 0);
1022 if (path_layout != NULL) {
1023 if (is_any_specified(path_layout)) {
1024 inherit_sys_attributes(path_layout, donor_path);
1027 llapi_layout_free(path_layout);
1031 /* Inherit layout from the filesystem root. */
1032 rc = llapi_search_mounts(path, 0, donor_path, NULL);
1035 path_layout = llapi_layout_get_by_path(donor_path, 0);
1036 if (path_layout == NULL)
1039 inherit_sys_attributes(path_layout, donor_path);
1044 * Get the striping layout for the file at \a path.
1046 * If \a flags contains LAYOUT_GET_EXPECTED, substitute
1047 * expected inherited attribute values for unspecified attributes. See
1048 * llapi_layout_expected().
1050 * \param[in] path path for which to get the layout
1051 * \param[in] flags flags to control how layout is retrieved
1053 * \retval valid llapi_layout pointer on success
1054 * \retval NULL if an error occurs
1056 struct llapi_layout *llapi_layout_get_by_path(const char *path, uint32_t flags)
1058 struct llapi_layout *layout = NULL;
1062 if (flags & LAYOUT_GET_EXPECTED)
1063 return llapi_layout_expected(path);
1065 fd = open(path, O_RDONLY);
1069 layout = llapi_layout_get_by_fd(fd, flags);
1078 * Get the layout for the file with FID \a fidstr in filesystem \a lustre_dir.
1080 * \param[in] lustre_dir path within Lustre filesystem containing \a fid
1081 * \param[in] fid Lustre identifier of file to get layout for
1083 * \retval valid llapi_layout pointer on success
1084 * \retval NULL if an error occurs
1086 struct llapi_layout *llapi_layout_get_by_fid(const char *lustre_dir,
1087 const struct lu_fid *fid,
1092 int saved_msg_level = llapi_msg_get_level();
1093 struct llapi_layout *layout = NULL;
1095 /* Prevent llapi internal routines from writing to console
1096 * while executing this function, then restore previous message
1098 llapi_msg_set_level(LLAPI_MSG_OFF);
1099 fd = llapi_open_by_fid(lustre_dir, fid, O_RDONLY);
1100 llapi_msg_set_level(saved_msg_level);
1105 layout = llapi_layout_get_by_fd(fd, flags);
1114 * Get the stripe count of \a layout.
1116 * \param[in] layout layout to get stripe count from
1117 * \param[out] count integer to store stripe count in
1119 * \retval 0 on success
1120 * \retval -1 if arguments are invalid
1122 int llapi_layout_stripe_count_get(const struct llapi_layout *layout,
1125 struct llapi_layout_comp *comp;
1127 comp = __llapi_layout_cur_comp(layout);
1131 if (count == NULL) {
1136 *count = comp->llc_stripe_count;
1142 * The llapi_layout API functions have these extra validity checks since
1143 * they use intuitively named macros to denote special behavior, whereas
1144 * the old API uses 0 and -1.
1147 static bool llapi_layout_stripe_count_is_valid(int64_t stripe_count)
1149 return stripe_count == LLAPI_LAYOUT_DEFAULT ||
1150 stripe_count == LLAPI_LAYOUT_WIDE ||
1151 (stripe_count != 0 && stripe_count != -1 &&
1152 llapi_stripe_count_is_valid(stripe_count));
1155 static bool llapi_layout_extension_size_is_valid(uint64_t ext_size)
1157 return (ext_size != 0 &&
1158 llapi_stripe_size_is_aligned(ext_size) &&
1159 !llapi_stripe_size_is_too_big(ext_size));
1162 static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size)
1164 return stripe_size == LLAPI_LAYOUT_DEFAULT ||
1165 (stripe_size != 0 &&
1166 llapi_stripe_size_is_aligned(stripe_size) &&
1167 !llapi_stripe_size_is_too_big(stripe_size));
1170 static bool llapi_layout_stripe_index_is_valid(int64_t stripe_index)
1172 return stripe_index == LLAPI_LAYOUT_DEFAULT ||
1173 (stripe_index >= 0 &&
1174 llapi_stripe_index_is_valid(stripe_index));
1178 * Set the stripe count of \a layout.
1180 * \param[in] layout layout to set stripe count in
1181 * \param[in] count value to be set
1183 * \retval 0 on success
1184 * \retval -1 if arguments are invalid
1186 int llapi_layout_stripe_count_set(struct llapi_layout *layout,
1189 struct llapi_layout_comp *comp;
1191 comp = __llapi_layout_cur_comp(layout);
1195 if (!llapi_layout_stripe_count_is_valid(count)) {
1200 comp->llc_stripe_count = count;
1206 * Get the stripe/extension size of \a layout.
1208 * \param[in] layout layout to get stripe size from
1209 * \param[out] size integer to store stripe size in
1210 * \param[in] extension flag if extenion size is requested
1212 * \retval 0 on success
1213 * \retval -1 if arguments are invalid
1215 static int layout_stripe_size_get(const struct llapi_layout *layout,
1216 uint64_t *size, bool extension)
1218 struct llapi_layout_comp *comp;
1221 comp = __llapi_layout_cur_comp(layout);
1230 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1231 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1236 *size = comp->llc_stripe_size;
1237 if (comp->llc_flags & LCME_FL_EXTENSION)
1238 *size *= SEL_UNIT_SIZE;
1243 int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
1246 return layout_stripe_size_get(layout, size, false);
1249 int llapi_layout_extension_size_get(const struct llapi_layout *layout,
1252 return layout_stripe_size_get(layout, size, true);
1256 * Set the stripe/extension size of \a layout.
1258 * \param[in] layout layout to set stripe size in
1259 * \param[in] size value to be set
1260 * \param[in] extension flag if extenion size is passed
1262 * \retval 0 on success
1263 * \retval -1 if arguments are invalid
1265 static int layout_stripe_size_set(struct llapi_layout *layout,
1266 uint64_t size, bool extension)
1268 struct llapi_layout_comp *comp;
1271 comp = __llapi_layout_cur_comp(layout);
1275 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1276 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1282 size /= SEL_UNIT_SIZE;
1284 if ((comp_ext && !llapi_layout_extension_size_is_valid(size)) ||
1285 (!comp_ext && !llapi_layout_stripe_size_is_valid(size))) {
1290 comp->llc_stripe_size = size;
1294 int llapi_layout_stripe_size_set(struct llapi_layout *layout,
1297 return layout_stripe_size_set(layout, size, false);
1300 int llapi_layout_extension_size_set(struct llapi_layout *layout,
1303 return layout_stripe_size_set(layout, size, true);
1307 * Get the RAID pattern of \a layout.
1309 * \param[in] layout layout to get pattern from
1310 * \param[out] pattern integer to store pattern in
1312 * \retval 0 on success
1313 * \retval -1 if arguments are invalid
1315 int llapi_layout_pattern_get(const struct llapi_layout *layout,
1318 struct llapi_layout_comp *comp;
1320 comp = __llapi_layout_cur_comp(layout);
1324 if (pattern == NULL) {
1329 *pattern = comp->llc_pattern;
1335 * Set the pattern of \a layout.
1337 * \param[in] layout layout to set pattern in
1338 * \param[in] pattern value to be set
1340 * \retval 0 on success
1341 * \retval -1 if arguments are invalid or RAID pattern
1344 int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
1346 struct llapi_layout_comp *comp;
1348 comp = __llapi_layout_cur_comp(layout);
1352 if (pattern != LLAPI_LAYOUT_DEFAULT &&
1353 pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT
1354 && pattern != LLAPI_LAYOUT_OVERSTRIPING) {
1359 comp->llc_pattern = pattern |
1360 (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC);
1365 static inline int stripe_number_roundup(int stripe_number)
1367 unsigned int round_up = (stripe_number + 8) & ~7;
1368 return round_up > LOV_MAX_STRIPE_COUNT ?
1369 LOV_MAX_STRIPE_COUNT : round_up;
1373 * Set the OST index of stripe number \a stripe_number to \a ost_index.
1375 * If only the starting stripe's OST index is specified, then this can use
1376 * the normal LOV_MAGIC_{V1,V3} layout type. If multiple OST indices are
1377 * given, then allocate an array to hold the list of indices and ensure that
1378 * the LOV_USER_MAGIC_SPECIFIC layout is used when creating the file.
1380 * \param[in] layout layout to set OST index in
1381 * \param[in] stripe_number stripe number to set index for
1382 * \param[in] ost_index the index to set
1384 * \retval 0 on success
1385 * \retval -1 if arguments are invalid or an unsupported stripe number
1386 * was specified, error returned in errno
1388 int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number,
1391 struct llapi_layout_comp *comp;
1393 comp = __llapi_layout_cur_comp(layout);
1397 if (!llapi_layout_stripe_index_is_valid(ost_index)) {
1402 if (stripe_number == 0 && ost_index == LLAPI_LAYOUT_DEFAULT) {
1403 comp->llc_stripe_offset = ost_index;
1404 comp->llc_pattern &= ~LLAPI_LAYOUT_SPECIFIC;
1405 __llapi_comp_objects_realloc(comp, 0);
1406 } else if (stripe_number >= 0 &&
1407 stripe_number < LOV_MAX_STRIPE_COUNT) {
1408 if (ost_index >= LLAPI_LAYOUT_IDX_MAX) {
1413 /* Preallocate a few more stripes to avoid realloc() overhead.*/
1414 if (__llapi_comp_objects_realloc(comp,
1415 stripe_number_roundup(stripe_number)) < 0)
1418 comp->llc_objects[stripe_number].l_ost_idx = ost_index;
1420 if (stripe_number == 0)
1421 comp->llc_stripe_offset = ost_index;
1423 comp->llc_pattern |= LLAPI_LAYOUT_SPECIFIC;
1425 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT ||
1426 comp->llc_stripe_count <= stripe_number)
1427 comp->llc_stripe_count = stripe_number + 1;
1437 * Get the OST index associated with stripe \a stripe_number.
1439 * Stripes are indexed starting from zero.
1441 * \param[in] layout layout to get index from
1442 * \param[in] stripe_number stripe number to get index for
1443 * \param[out] index integer to store index in
1445 * \retval 0 on success
1446 * \retval -1 if arguments are invalid
1448 int llapi_layout_ost_index_get(const struct llapi_layout *layout,
1449 uint64_t stripe_number, uint64_t *index)
1451 struct llapi_layout_comp *comp;
1453 comp = __llapi_layout_cur_comp(layout);
1457 if (index == NULL) {
1462 if (stripe_number >= comp->llc_stripe_count ||
1463 stripe_number >= comp->llc_objects_count) {
1468 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
1469 *index = LLAPI_LAYOUT_DEFAULT;
1471 *index = comp->llc_objects[stripe_number].l_ost_idx;
1478 * Get the pool name of layout \a layout.
1480 * \param[in] layout layout to get pool name from
1481 * \param[out] dest buffer to store pool name in
1482 * \param[in] n size in bytes of buffer \a dest
1484 * \retval 0 on success
1485 * \retval -1 if arguments are invalid
1487 int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest,
1490 struct llapi_layout_comp *comp;
1492 comp = __llapi_layout_cur_comp(layout);
1501 strncpy(dest, comp->llc_pool_name, n);
1507 * Set the name of the pool of layout \a layout.
1509 * \param[in] layout layout to set pool name in
1510 * \param[in] pool_name pool name to set
1512 * \retval 0 on success
1513 * \retval -1 if arguments are invalid or pool name is too long
1515 int llapi_layout_pool_name_set(struct llapi_layout *layout,
1516 const char *pool_name)
1518 struct llapi_layout_comp *comp;
1521 comp = __llapi_layout_cur_comp(layout);
1525 if (pool_name == NULL) {
1530 /* Strip off any 'fsname.' portion. */
1531 ptr = strchr(pool_name, '.');
1533 pool_name = ptr + 1;
1535 if (strlen(pool_name) > LOV_MAXPOOLNAME) {
1540 strncpy(comp->llc_pool_name, pool_name, sizeof(comp->llc_pool_name));
1546 * Open and possibly create a file with a given \a layout.
1548 * If \a layout is NULL this function acts as a simple wrapper for
1549 * open(). By convention, ENOTTY is returned in errno if \a path
1550 * refers to a non-Lustre file.
1552 * \param[in] path name of the file to open
1553 * \param[in] open_flags open() flags
1554 * \param[in] mode permissions to create file, filtered by umask
1555 * \param[in] layout layout to create new file with
1557 * \retval non-negative file descriptor on successful open
1558 * \retval -1 if an error occurred
1560 int llapi_layout_file_open(const char *path, int open_flags, mode_t mode,
1561 const struct llapi_layout *layout)
1566 struct lov_user_md *lum;
1570 (layout != NULL && layout->llot_magic != LLAPI_LAYOUT_MAGIC)) {
1575 /* Object creation must be postponed until after layout attributes
1576 * have been applied. */
1577 if (layout != NULL && (open_flags & O_CREAT))
1578 open_flags |= O_LOV_DELAY_CREATE;
1580 fd = open(path, open_flags, mode);
1582 if (layout == NULL || fd < 0)
1585 lum = llapi_layout_to_lum(layout);
1594 if (lum->lmm_magic == LOV_USER_MAGIC_COMP_V1)
1595 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
1596 else if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC)
1597 lum_size = lov_user_md_size(lum->lmm_stripe_count,
1600 lum_size = lov_user_md_size(0, lum->lmm_magic);
1602 rc = fsetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_size, 0);
1611 errno = errno == EOPNOTSUPP ? ENOTTY : errno;
1617 * Create a file with a given \a layout.
1619 * Force O_CREAT and O_EXCL flags on so caller is assured that file was
1620 * created with the given \a layout on successful function return.
1622 * \param[in] path name of the file to open
1623 * \param[in] open_flags open() flags
1624 * \param[in] mode permissions to create new file with
1625 * \param[in] layout layout to create new file with
1627 * \retval non-negative file descriptor on successful open
1628 * \retval -1 if an error occurred
1630 int llapi_layout_file_create(const char *path, int open_flags, int mode,
1631 const struct llapi_layout *layout)
1633 return llapi_layout_file_open(path, open_flags|O_CREAT|O_EXCL, mode,
1637 int llapi_layout_flags_get(struct llapi_layout *layout, uint32_t *flags)
1639 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1644 *flags = layout->llot_flags;
1649 * Set flags to the header of a component layout.
1651 int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
1653 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1658 layout->llot_flags = flags;
1662 const char *llapi_layout_flags_string(uint32_t flags)
1664 switch (flags & LCM_FL_FLR_MASK) {
1667 case LCM_FL_WRITE_PENDING:
1669 case LCM_FL_SYNC_PENDING:
1676 const __u16 llapi_layout_string_flags(char *string)
1678 if (strncmp(string, "ro", strlen(string)) == 0)
1679 return LCM_FL_RDONLY;
1680 if (strncmp(string, "wp", strlen(string)) == 0)
1681 return LCM_FL_WRITE_PENDING;
1682 if (strncmp(string, "sp", strlen(string)) == 0)
1683 return LCM_FL_SYNC_PENDING;
1689 * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
1690 * @count: Mirror count value to be checked.
1692 * This function checks the validity of mirror count.
1694 * Return: true on success or false on failure.
1696 static bool llapi_layout_mirror_count_is_valid(uint16_t count)
1698 return count >= 0 && count <= LUSTRE_MIRROR_COUNT_MAX;
1702 * llapi_layout_mirror_count_get() - Get mirror count from the header of
1704 * @layout: Layout to get mirror count from.
1705 * @count: Returned mirror count value.
1707 * This function gets mirror count from the header of a layout.
1709 * Return: 0 on success or -1 on failure.
1711 int llapi_layout_mirror_count_get(struct llapi_layout *layout,
1714 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1719 *count = layout->llot_mirror_count;
1724 * llapi_layout_mirror_count_set() - Set mirror count to the header of a layout.
1725 * @layout: Layout to set mirror count in.
1726 * @count: Mirror count value to be set.
1728 * This function sets mirror count to the header of a layout.
1730 * Return: 0 on success or -1 on failure.
1732 int llapi_layout_mirror_count_set(struct llapi_layout *layout,
1735 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1740 if (!llapi_layout_mirror_count_is_valid(count)) {
1745 layout->llot_mirror_count = count;
1750 * Fetch the start and end offset of the current layout component.
1752 * \param[in] layout the layout component
1753 * \param[out] start extent start, inclusive
1754 * \param[out] end extent end, exclusive
1756 * \retval 0 on success
1757 * \retval <0 if error occurs
1759 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
1760 uint64_t *start, uint64_t *end)
1762 struct llapi_layout_comp *comp;
1764 comp = __llapi_layout_cur_comp(layout);
1768 if (start == NULL || end == NULL) {
1773 *start = comp->llc_extent.e_start;
1774 *end = comp->llc_extent.e_end;
1780 * Set the layout extent of a layout.
1782 * \param[in] layout the layout to be set
1783 * \param[in] start extent start, inclusive
1784 * \param[in] end extent end, exclusive
1786 * \retval 0 on success
1787 * \retval <0 if error occurs
1789 int llapi_layout_comp_extent_set(struct llapi_layout *layout,
1790 uint64_t start, uint64_t end)
1792 struct llapi_layout_comp *prev, *next, *comp;
1794 comp = __llapi_layout_cur_comp(layout);
1804 * We need to make sure the extent to be set is valid: the new
1805 * extent must be adjacent with the prev & next component.
1807 if (comp->llc_list.prev != &layout->llot_comp_list) {
1808 prev = list_entry(comp->llc_list.prev, typeof(*prev),
1810 if (start != 0 && start != prev->llc_extent.e_end) {
1816 if (comp->llc_list.next != &layout->llot_comp_list) {
1817 next = list_entry(comp->llc_list.next, typeof(*next),
1819 if (next->llc_extent.e_start != 0 &&
1820 end != next->llc_extent.e_start) {
1826 comp->llc_extent.e_start = start;
1827 comp->llc_extent.e_end = end;
1828 layout->llot_is_composite = true;
1834 * Gets the attribute flags of the current component.
1836 * \param[in] layout the layout component
1837 * \param[out] flags stored the returned component flags
1839 * \retval 0 on success
1840 * \retval <0 if error occurs
1842 int llapi_layout_comp_flags_get(const struct llapi_layout *layout,
1845 struct llapi_layout_comp *comp;
1847 comp = __llapi_layout_cur_comp(layout);
1851 if (flags == NULL) {
1856 *flags = comp->llc_flags;
1862 * Sets the specified flags of the current component leaving other flags as-is.
1864 * \param[in] layout the layout component
1865 * \param[in] flags component flags to be set
1867 * \retval 0 on success
1868 * \retval <0 if error occurs
1870 int llapi_layout_comp_flags_set(struct llapi_layout *layout, uint32_t flags)
1872 struct llapi_layout_comp *comp;
1874 comp = __llapi_layout_cur_comp(layout);
1878 comp->llc_flags |= flags;
1884 * Clears the flags specified in the flags leaving other flags as-is.
1886 * \param[in] layout the layout component
1887 * \param[in] flags component flags to be cleared
1889 * \retval 0 on success
1890 * \retval <0 if error occurs
1892 int llapi_layout_comp_flags_clear(struct llapi_layout *layout,
1895 struct llapi_layout_comp *comp;
1897 comp = __llapi_layout_cur_comp(layout);
1901 comp->llc_flags &= ~flags;
1907 * Fetches the file-unique component ID of the current layout component.
1909 * \param[in] layout the layout component
1910 * \param[out] id stored the returned component ID
1912 * \retval 0 on success
1913 * \retval <0 if error occurs
1915 int llapi_layout_comp_id_get(const struct llapi_layout *layout, uint32_t *id)
1917 struct llapi_layout_comp *comp;
1919 comp = __llapi_layout_cur_comp(layout);
1933 * Return the mirror id of the current layout component.
1935 * \param[in] layout the layout component
1936 * \param[out] id stored the returned mirror ID
1938 * \retval 0 on success
1939 * \retval <0 if error occurs
1941 int llapi_layout_mirror_id_get(const struct llapi_layout *layout, uint32_t *id)
1943 struct llapi_layout_comp *comp;
1945 comp = __llapi_layout_cur_comp(layout);
1954 *id = mirror_id_of(comp->llc_id);
1960 * Adds a component to \a layout, the new component will be added to
1961 * the tail of components list and it'll inherit attributes of existing
1962 * ones. The \a layout will change it's current component pointer to
1963 * the newly added component, and it'll be turned into a composite
1964 * layout if it was not before the adding.
1966 * \param[in] layout existing composite or plain layout
1968 * \retval 0 on success
1969 * \retval <0 if error occurs
1971 int llapi_layout_comp_add(struct llapi_layout *layout)
1973 struct llapi_layout_comp *last, *comp, *new;
1975 comp = __llapi_layout_cur_comp(layout);
1979 new = __llapi_comp_alloc(0);
1983 last = list_entry(layout->llot_comp_list.prev, typeof(*last),
1986 if (new->llc_extent.e_end <= last->llc_extent.e_end) {
1987 __llapi_comp_free(new);
1991 new->llc_extent.e_start = last->llc_extent.e_end;
1993 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1994 layout->llot_cur_comp = new;
1995 layout->llot_is_composite = true;
2000 * Adds a first component of a mirror to \a layout.
2001 * The \a layout will change it's current component pointer to
2002 * the newly added component, and it'll be turned into a composite
2003 * layout if it was not before the adding.
2005 * \param[in] layout existing composite or plain layout
2007 * \retval 0 on success
2008 * \retval <0 if error occurs
2010 int llapi_layout_add_first_comp(struct llapi_layout *layout)
2012 struct llapi_layout_comp *comp, *new;
2014 comp = __llapi_layout_cur_comp(layout);
2018 new = __llapi_comp_alloc(0);
2022 new->llc_extent.e_start = 0;
2024 list_add_tail(&new->llc_list, &layout->llot_comp_list);
2025 layout->llot_cur_comp = new;
2026 layout->llot_is_composite = true;
2032 * Deletes current component from the composite layout. The component
2033 * to be deleted must be the tail of components list, and it can't be
2034 * the only component in the layout.
2036 * \param[in] layout composite layout
2038 * \retval 0 on success
2039 * \retval <0 if error occurs
2041 int llapi_layout_comp_del(struct llapi_layout *layout)
2043 struct llapi_layout_comp *comp;
2045 comp = __llapi_layout_cur_comp(layout);
2049 if (!layout->llot_is_composite) {
2054 /* It must be the tail of the list (for PFL, can be relaxed
2055 * once we get mirrored components) */
2056 if (comp->llc_list.next != &layout->llot_comp_list) {
2060 /* It can't be the only one on the list */
2061 if (comp->llc_list.prev == &layout->llot_comp_list) {
2066 layout->llot_cur_comp =
2067 list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
2068 list_del_init(&comp->llc_list);
2069 __llapi_comp_free(comp);
2075 * Move the current component pointer to the component with
2076 * specified component ID.
2078 * \param[in] layout composite layout
2079 * \param[in] id component ID
2081 * \retval =0 : moved successfully
2082 * \retval <0 if error occurs
2084 int llapi_layout_comp_use_id(struct llapi_layout *layout, uint32_t comp_id)
2086 struct llapi_layout_comp *comp;
2088 comp = __llapi_layout_cur_comp(layout);
2090 return -1; /* use previously set errno */
2092 if (!layout->llot_is_composite) {
2097 if (comp_id == LCME_ID_INVAL) {
2102 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
2103 if (comp->llc_id == comp_id) {
2104 layout->llot_cur_comp = comp;
2113 * Move the current component pointer to a specified position.
2115 * \param[in] layout composite layout
2116 * \param[in] pos the position to be moved, it can be:
2117 * LLAPI_LAYOUT_COMP_USE_FIRST: use first component
2118 * LLAPI_LAYOUT_COMP_USE_LAST: use last component
2119 * LLAPI_LAYOUT_COMP_USE_NEXT: use component after current
2120 * LLAPI_LAYOUT_COMP_USE_PREV: use component before current
2122 * \retval =0 : moved successfully
2123 * \retval =1 : at last component with NEXT, at first component with PREV
2124 * \retval <0 if error occurs
2126 int llapi_layout_comp_use(struct llapi_layout *layout,
2127 enum llapi_layout_comp_use pos)
2129 struct llapi_layout_comp *comp, *head, *tail;
2131 comp = __llapi_layout_cur_comp(layout);
2135 if (!layout->llot_is_composite) {
2136 if (pos == LLAPI_LAYOUT_COMP_USE_FIRST ||
2137 pos == LLAPI_LAYOUT_COMP_USE_LAST)
2143 head = list_entry(layout->llot_comp_list.next, typeof(*head), llc_list);
2144 tail = list_entry(layout->llot_comp_list.prev, typeof(*tail), llc_list);
2146 case LLAPI_LAYOUT_COMP_USE_FIRST:
2147 layout->llot_cur_comp = head;
2149 case LLAPI_LAYOUT_COMP_USE_NEXT:
2154 layout->llot_cur_comp = list_entry(comp->llc_list.next,
2155 typeof(*comp), llc_list);
2157 case LLAPI_LAYOUT_COMP_USE_LAST:
2158 layout->llot_cur_comp = tail;
2160 case LLAPI_LAYOUT_COMP_USE_PREV:
2165 layout->llot_cur_comp = list_entry(comp->llc_list.prev,
2166 typeof(*comp), llc_list);
2177 * Add layout component(s) to an existing file.
2179 * \param[in] path The path name of the file
2180 * \param[in] layout The layout component(s) to be added
2182 int llapi_layout_file_comp_add(const char *path,
2183 const struct llapi_layout *layout)
2185 int rc, fd, lum_size, tmp_errno = 0;
2186 struct lov_user_md *lum;
2188 if (path == NULL || layout == NULL ||
2189 layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
2194 lum = llapi_layout_to_lum(layout);
2198 if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) {
2203 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2205 fd = open(path, O_RDWR);
2212 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".add", lum, lum_size, 0);
2227 * Delete component(s) by the specified component id or component flags
2228 * from an existing file.
2230 * \param[in] path path name of the file
2231 * \param[in] id unique component ID
2232 * \param[in] flags flags: LCME_FL_* or;
2233 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2235 int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags)
2237 int rc, fd, lum_size;
2238 struct llapi_layout *layout;
2239 struct llapi_layout_comp *comp;
2240 struct lov_user_md *lum;
2242 if (path == NULL || id > LCME_ID_MAX || (flags & ~LCME_KNOWN_FLAGS)) {
2247 /* Can only specify ID or flags, not both. */
2248 if (id != 0 && flags != 0) {
2253 layout = llapi_layout_alloc();
2257 llapi_layout_comp_extent_set(layout, 0, LUSTRE_EOF);
2258 comp = __llapi_layout_cur_comp(layout);
2260 llapi_layout_free(layout);
2265 comp->llc_flags = flags;
2267 lum = llapi_layout_to_lum(layout);
2269 llapi_layout_free(layout);
2272 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2274 fd = open(path, O_RDWR);
2280 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".del", lum, lum_size, 0);
2282 int tmp_errno = errno;
2291 llapi_layout_free(layout);
2296 * Change flags or other parameters of the component(s) by component ID of an
2297 * existing file. The component to be modified is specified by the
2298 * comp->lcme_id value, which must be an unique component ID. The new
2299 * attributes are passed in by @comp and @valid is used to specify which
2300 * attributes in the component are going to be changed.
2302 * \param[in] path path name of the file
2303 * \param[in] ids An array of component IDs
2304 * \param[in] flags flags: LCME_FL_* or;
2305 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2306 * \param[in] count Number of elements in ids and flags array
2308 int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags,
2311 int rc = -1, fd = -1, i;
2313 struct llapi_layout *layout;
2314 struct llapi_layout_comp *comp;
2315 struct lov_user_md *lum = NULL;
2325 for (i = 0; i < count; i++) {
2326 if (!ids[i] || !flags[i]) {
2331 if (ids[i] > LCME_ID_MAX || (flags[i] & ~LCME_KNOWN_FLAGS)) {
2336 /* do not allow to set or clear INIT flag */
2337 if (flags[i] & LCME_FL_INIT) {
2343 layout = __llapi_layout_alloc();
2347 layout->llot_is_composite = true;
2348 for (i = 0; i < count; i++) {
2349 comp = __llapi_comp_alloc(0);
2353 comp->llc_id = ids[i];
2354 comp->llc_flags = flags[i];
2356 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
2357 layout->llot_cur_comp = comp;
2360 lum = llapi_layout_to_lum(layout);
2364 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2366 fd = open(path, O_RDWR);
2370 /* flush cached pages from clients */
2371 rc = llapi_file_flush(fd);
2378 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0);
2386 int tmp_errno = errno;
2393 llapi_layout_free(layout);
2398 * Check if the file layout is composite.
2400 * \param[in] layout the file layout to check
2402 * \retval true composite
2403 * \retval false not composite
2405 bool llapi_layout_is_composite(struct llapi_layout *layout)
2407 return layout->llot_is_composite;
2411 * Iterate every components in the @layout and call callback function @cb.
2413 * \param[in] layout component layout list.
2414 * \param[in] cb callback for each component
2415 * \param[in] cbdata callback data
2417 * \retval < 0 error happens during the iteration
2418 * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error
2419 * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration
2421 int llapi_layout_comp_iterate(struct llapi_layout *layout,
2422 llapi_layout_iter_cb cb, void *cbdata)
2426 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2431 * make sure on success llapi_layout_comp_use() API returns 0 with
2437 rc = cb(layout, cbdata);
2438 if (rc != LLAPI_LAYOUT_ITER_CONT)
2441 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2444 else if (rc == 1) /* reached the last comp */
2445 return LLAPI_LAYOUT_ITER_CONT;
2452 * llapi_layout_merge() - Merge a composite layout into another one.
2453 * @dst_layout: Destination composite layout.
2454 * @src_layout: Source composite layout.
2456 * This function copies all of the components from @src_layout and
2457 * appends them to @dst_layout.
2459 * Return: 0 on success or -1 on failure.
2461 int llapi_layout_merge(struct llapi_layout **dst_layout,
2462 const struct llapi_layout *src_layout)
2464 struct llapi_layout *new_layout = *dst_layout;
2465 struct llapi_layout_comp *new = NULL;
2466 struct llapi_layout_comp *comp = NULL;
2469 if (src_layout == NULL ||
2470 list_empty((struct list_head *)&src_layout->llot_comp_list))
2473 if (new_layout == NULL) {
2474 new_layout = __llapi_layout_alloc();
2475 if (new_layout == NULL) {
2481 list_for_each_entry(comp, &src_layout->llot_comp_list, llc_list) {
2482 new = __llapi_comp_alloc(0);
2488 new->llc_pattern = comp->llc_pattern;
2489 new->llc_stripe_size = comp->llc_stripe_size;
2490 new->llc_stripe_count = comp->llc_stripe_count;
2491 new->llc_stripe_offset = comp->llc_stripe_offset;
2493 if (comp->llc_pool_name[0] != '\0')
2494 strncpy(new->llc_pool_name, comp->llc_pool_name,
2495 sizeof(new->llc_pool_name));
2497 for (i = 0; i < comp->llc_objects_count; i++) {
2498 if (__llapi_comp_objects_realloc(new,
2499 stripe_number_roundup(i)) < 0) {
2501 __llapi_comp_free(new);
2504 new->llc_objects[i].l_ost_idx = \
2505 comp->llc_objects[i].l_ost_idx;
2508 new->llc_objects_count = comp->llc_objects_count;
2509 new->llc_extent.e_start = comp->llc_extent.e_start;
2510 new->llc_extent.e_end = comp->llc_extent.e_end;
2511 new->llc_id = comp->llc_id;
2512 new->llc_flags = comp->llc_flags;
2514 list_add_tail(&new->llc_list, &new_layout->llot_comp_list);
2515 new_layout->llot_cur_comp = new;
2517 new_layout->llot_is_composite = true;
2519 *dst_layout = new_layout;
2522 llapi_layout_free(new_layout);
2527 * Find all stale components.
2529 * \param[in] layout component layout list.
2530 * \param[out] comp array of stale component info.
2531 * \param[in] comp_size array size of @comp.
2532 * \param[in] mirror_ids array of mirror id that only components
2533 * belonging to these mirror will be collected.
2534 * \param[in] ids_nr number of mirror ids array.
2536 * \retval number of component info collected on sucess or
2537 * an error code on failure.
2539 int llapi_mirror_find_stale(struct llapi_layout *layout,
2540 struct llapi_resync_comp *comp, size_t comp_size,
2541 __u16 *mirror_ids, int ids_nr)
2546 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2554 uint64_t start, end;
2556 rc = llapi_layout_comp_flags_get(layout, &flags);
2560 if (!(flags & LCME_FL_STALE))
2563 rc = llapi_layout_mirror_id_get(layout, &mirror_id);
2567 /* the caller only wants stale components from specific
2572 for (j = 0; j < ids_nr; j++) {
2573 if (mirror_ids[j] == mirror_id)
2577 /* not in the specified mirror */
2580 } else if (flags & LCME_FL_NOSYNC) {
2581 /* if not specified mirrors, do not resync "nosync"
2586 rc = llapi_layout_comp_id_get(layout, &id);
2590 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2594 /* pack this component into @comp array */
2595 comp[idx].lrc_id = id;
2596 comp[idx].lrc_mirror_id = mirror_id;
2597 comp[idx].lrc_start = start;
2598 comp[idx].lrc_end = end;
2601 if (idx >= comp_size) {
2607 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2614 return rc < 0 ? rc : idx;
2617 /* locate @layout to a valid component covering file [file_start, file_end) */
2618 uint32_t llapi_mirror_find(struct llapi_layout *layout,
2619 uint64_t file_start, uint64_t file_end,
2622 uint32_t mirror_id = 0;
2625 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2631 uint64_t start, end;
2632 uint32_t flags, id, rid;
2634 rc = llapi_layout_comp_flags_get(layout, &flags);
2638 if (flags & LCME_FL_STALE)
2641 rc = llapi_layout_mirror_id_get(layout, &rid);
2645 rc = llapi_layout_comp_id_get(layout, &id);
2649 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2653 if (file_start >= start && file_start < end) {
2656 else if (mirror_id != rid || *endp != start)
2659 file_start = *endp = end;
2660 if (end >= file_end)
2665 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2673 int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
2674 struct llapi_resync_comp *comp_array,
2675 int comp_size, uint64_t start, uint64_t end)
2678 size_t page_size = sysconf(_SC_PAGESIZE);
2679 const size_t buflen = 4 << 20; /* 4M */
2681 uint64_t pos = start;
2685 rc = posix_memalign(&buf, page_size, buflen);
2689 if (end == OBD_OBJECT_EOF)
2690 count = OBD_OBJECT_EOF;
2692 count = end - start;
2696 uint64_t mirror_end = 0;
2697 uint64_t bytes_left;
2702 src = llapi_mirror_find(layout, pos, end, &mirror_end);
2706 if (mirror_end == OBD_OBJECT_EOF) {
2709 bytes_left = MIN(count, mirror_end - pos);
2710 bytes_left = ((bytes_left - 1) | (page_size - 1)) + 1;
2712 to_read = MIN(buflen, bytes_left);
2714 bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
2715 if (bytes_read == 0) {
2719 if (bytes_read < 0) {
2724 /* round up to page align to make direct IO happy. */
2725 to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
2727 for (i = 0; i < comp_size; i++) {
2730 size_t to_write2 = to_write;
2732 /* skip non-overlapped component */
2733 if (pos >= comp_array[i].lrc_end ||
2734 pos + to_write <= comp_array[i].lrc_start)
2737 if (pos < comp_array[i].lrc_start)
2738 pos2 = comp_array[i].lrc_start;
2740 to_write2 -= pos2 - pos;
2742 if ((pos + to_write) > comp_array[i].lrc_end)
2743 to_write2 -= pos + to_write -
2744 comp_array[i].lrc_end;
2746 written = llapi_mirror_write(fd,
2747 comp_array[i].lrc_mirror_id,
2752 * this component is not written successfully,
2753 * mark it using its lrc_synced, it is supposed
2754 * to be false before getting here.
2756 * And before this function returns, all
2757 * elements of comp_array will reverse their
2758 * lrc_synced flag to reflect their true
2761 comp_array[i].lrc_synced = true;
2764 assert(written == to_write2);
2768 count -= bytes_read;
2774 for (i = 0; i < comp_size; i++)
2775 comp_array[i].lrc_synced = false;
2779 for (i = 0; i < comp_size; i++) {
2780 comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
2781 if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
2782 rc = llapi_mirror_truncate(fd,
2783 comp_array[i].lrc_mirror_id, pos);
2785 comp_array[i].lrc_synced = false;
2789 /* partially successful is successful */