4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the GNU Lesser General Public License
8 * (LGPL) version 2.1 or (at your discretion) any later version.
9 * (LGPL) version 2.1 accompanies this distribution, and is available at
10 * http://www.gnu.org/licenses/lgpl-2.1.html
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
20 * lustre/utils/liblustreapi_layout.c
22 * lustreapi library for layout calls for interacting with the layout of
23 * Lustre files while hiding details of the internal data structures
26 * Copyright (c) 2016, 2017, Intel Corporation.
28 * Author: Ned Bass <bass6@llnl.gov>
38 #include <sys/xattr.h>
39 #include <sys/param.h>
41 #include <libcfs/util/list.h>
42 #include <lustre/lustreapi.h>
43 #include "lustreapi_internal.h"
46 * Layout component, which contains all attributes of a plain
49 struct llapi_layout_comp {
51 uint64_t llc_stripe_size;
52 uint64_t llc_stripe_count;
53 uint64_t llc_stripe_offset;
54 /* Add 1 so user always gets back a null terminated string. */
55 char llc_pool_name[LOV_MAXPOOLNAME + 1];
56 /** Number of objects in llc_objects array if was initialized. */
57 uint32_t llc_objects_count;
58 struct lov_user_ost_data_v1 *llc_objects;
59 /* fields used only for composite layouts */
60 struct lu_extent llc_extent; /* [start, end) of component */
61 uint32_t llc_id; /* unique ID of component */
62 uint32_t llc_flags; /* LCME_FL_* flags */
63 uint64_t llc_timestamp; /* snapshot timestamp */
64 struct list_head llc_list; /* linked to the llapi_layout
70 * An Opaque data type abstracting the layout of a Lustre file.
73 uint32_t llot_magic; /* LLAPI_LAYOUT_MAGIC */
76 bool llot_is_composite;
77 uint16_t llot_mirror_count;
78 /* Cursor pointing to one of the components in llot_comp_list */
79 struct llapi_layout_comp *llot_cur_comp;
80 struct list_head llot_comp_list;
84 * Compute the number of elements in the lmm_objects array of \a lum
85 * with size \a lum_size.
87 * \param[in] lum the struct lov_user_md to check
88 * \param[in] lum_size the number of bytes in \a lum
90 * \retval number of elements in array lum->lmm_objects
92 static int llapi_layout_objects_in_lum(struct lov_user_md *lum, size_t lum_size)
97 if (lum_size < lov_user_md_size(0, LOV_MAGIC_V1))
100 if (lum->lmm_magic == __swab32(LOV_MAGIC_V1) ||
101 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
102 magic = __swab32(lum->lmm_magic);
104 magic = lum->lmm_magic;
106 base_size = lov_user_md_size(0, magic);
108 if (lum_size <= base_size)
111 return (lum_size - base_size) / sizeof(lum->lmm_objects[0]);
115 * Byte-swap the fields of struct lov_user_md.
117 * XXX Rather than duplicating swabbing code here, we should eventually
118 * refactor the needed functions in lustre/ptlrpc/pack_generic.c
119 * into a library that can be shared between kernel and user code.
122 llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
124 int i, j, ent_count, obj_count;
125 struct lov_comp_md_v1 *comp_v1 = NULL;
126 struct lov_comp_md_entry_v1 *ent;
127 struct lov_user_ost_data *lod;
129 if (lum->lmm_magic != __swab32(LOV_MAGIC_V1) &&
130 lum->lmm_magic != __swab32(LOV_MAGIC_V3) &&
131 lum->lmm_magic != __swab32(LOV_MAGIC_COMP_V1))
134 if (lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
135 comp_v1 = (struct lov_comp_md_v1 *)lum;
137 if (comp_v1 != NULL) {
138 comp_v1->lcm_magic = __swab32(comp_v1->lcm_magic);
139 comp_v1->lcm_size = __swab32(comp_v1->lcm_size);
140 comp_v1->lcm_layout_gen = __swab32(comp_v1->lcm_layout_gen);
141 comp_v1->lcm_flags = __swab16(comp_v1->lcm_flags);
142 comp_v1->lcm_entry_count = __swab16(comp_v1->lcm_entry_count);
143 ent_count = comp_v1->lcm_entry_count;
148 for (i = 0; i < ent_count; i++) {
149 if (comp_v1 != NULL) {
150 ent = &comp_v1->lcm_entries[i];
151 ent->lcme_id = __swab32(ent->lcme_id);
152 ent->lcme_flags = __swab32(ent->lcme_flags);
153 ent->lcme_timestamp = __swab64(ent->lcme_timestamp);
154 ent->lcme_extent.e_start = __swab64(ent->lcme_extent.e_start);
155 ent->lcme_extent.e_end = __swab64(ent->lcme_extent.e_end);
156 ent->lcme_offset = __swab32(ent->lcme_offset);
157 ent->lcme_size = __swab32(ent->lcme_size);
159 lum = (struct lov_user_md *)((char *)comp_v1 +
161 lum_size = ent->lcme_size;
163 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
165 lum->lmm_magic = __swab32(lum->lmm_magic);
166 lum->lmm_pattern = __swab32(lum->lmm_pattern);
167 lum->lmm_stripe_size = __swab32(lum->lmm_stripe_size);
168 lum->lmm_stripe_count = __swab16(lum->lmm_stripe_count);
169 lum->lmm_stripe_offset = __swab16(lum->lmm_stripe_offset);
171 if (lum->lmm_magic != LOV_MAGIC_V1) {
172 struct lov_user_md_v3 *v3;
173 v3 = (struct lov_user_md_v3 *)lum;
174 lod = v3->lmm_objects;
176 lod = lum->lmm_objects;
179 for (j = 0; j < obj_count; j++)
180 lod[j].l_ost_idx = __swab32(lod[j].l_ost_idx);
185 * (Re-)allocate llc_objects[] to \a num_stripes stripes.
187 * Copy over existing llc_objects[], if any, to the new llc_objects[].
189 * \param[in] layout existing layout to be modified
190 * \param[in] num_stripes number of stripes in new layout
192 * \retval 0 if the objects are re-allocated successfully
193 * \retval -1 on error with errno set
195 static int __llapi_comp_objects_realloc(struct llapi_layout_comp *comp,
196 unsigned int new_stripes)
198 struct lov_user_ost_data_v1 *new_objects;
201 if (new_stripes > LOV_MAX_STRIPE_COUNT) {
206 if (new_stripes == comp->llc_objects_count)
209 if (new_stripes != 0 && new_stripes <= comp->llc_objects_count)
212 new_objects = realloc(comp->llc_objects,
213 sizeof(*new_objects) * new_stripes);
214 if (new_objects == NULL && new_stripes != 0) {
219 for (i = comp->llc_objects_count; i < new_stripes; i++)
220 new_objects[i].l_ost_idx = LLAPI_LAYOUT_IDX_MAX;
222 comp->llc_objects = new_objects;
223 comp->llc_objects_count = new_stripes;
229 * Allocate storage for a llapi_layout_comp with \a num_stripes stripes.
231 * \param[in] num_stripes number of stripes in new layout
233 * \retval valid pointer if allocation succeeds
234 * \retval NULL if allocation fails
236 static struct llapi_layout_comp *__llapi_comp_alloc(unsigned int num_stripes)
238 struct llapi_layout_comp *comp;
240 if (num_stripes > LOV_MAX_STRIPE_COUNT) {
245 comp = calloc(1, sizeof(*comp));
251 comp->llc_objects = NULL;
252 comp->llc_objects_count = 0;
254 if (__llapi_comp_objects_realloc(comp, num_stripes) < 0) {
260 comp->llc_pattern = LLAPI_LAYOUT_DEFAULT;
261 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
262 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
263 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
264 comp->llc_pool_name[0] = '\0';
265 comp->llc_extent.e_start = 0;
266 comp->llc_extent.e_end = LUSTRE_EOF;
269 INIT_LIST_HEAD(&comp->llc_list);
275 * Free memory allocated for \a comp
277 * \param[in] comp previously allocated by __llapi_comp_alloc()
279 static void __llapi_comp_free(struct llapi_layout_comp *comp)
281 if (comp->llc_objects != NULL)
282 free(comp->llc_objects);
287 * Free memory allocated for \a layout.
289 * \param[in] layout previously allocated by llapi_layout_alloc()
291 void llapi_layout_free(struct llapi_layout *layout)
293 struct llapi_layout_comp *comp, *n;
298 list_for_each_entry_safe(comp, n, &layout->llot_comp_list, llc_list) {
299 list_del_init(&comp->llc_list);
300 __llapi_comp_free(comp);
306 * Allocate and initialize a llapi_layout structure.
308 * \retval valid llapi_layout pointer on success
309 * \retval NULL if memory allocation fails
311 static struct llapi_layout *__llapi_layout_alloc(void)
313 struct llapi_layout *layout;
315 layout = calloc(1, sizeof(*layout));
316 if (layout == NULL) {
322 layout->llot_magic = LLAPI_LAYOUT_MAGIC;
323 layout->llot_gen = 0;
324 layout->llot_flags = 0;
325 layout->llot_is_composite = false;
326 layout->llot_mirror_count = 1;
327 layout->llot_cur_comp = NULL;
328 INIT_LIST_HEAD(&layout->llot_comp_list);
334 * Allocate and initialize a new plain layout.
336 * \retval valid llapi_layout pointer on success
337 * \retval NULL if memory allocation fails
339 struct llapi_layout *llapi_layout_alloc(void)
341 struct llapi_layout_comp *comp;
342 struct llapi_layout *layout;
344 layout = __llapi_layout_alloc();
348 comp = __llapi_comp_alloc(0);
354 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
355 layout->llot_cur_comp = comp;
361 * Check if the given \a lum_size is large enough to hold the required
364 * \param[in] lum the struct lov_user_md to check
365 * \param[in] lum_size the number of bytes in \a lum
367 * \retval true the \a lum_size is too small
368 * \retval false the \a lum_size is large enough
370 static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
374 if (lum_size < sizeof(lum->lmm_magic))
377 if (lum->lmm_magic == LOV_MAGIC_V1 ||
378 lum->lmm_magic == __swab32(LOV_MAGIC_V1))
379 magic = LOV_MAGIC_V1;
380 else if (lum->lmm_magic == LOV_MAGIC_V3 ||
381 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
382 magic = LOV_MAGIC_V3;
383 else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
384 lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
385 magic = LOV_MAGIC_COMP_V1;
389 if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
390 return lum_size < lov_user_md_size(0, magic);
392 return lum_size < sizeof(struct lov_comp_md_v1);
395 /* Verify if the objects count in lum is consistent with the
396 * stripe count in lum. It applies to regular file only. */
397 static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
399 struct lov_comp_md_v1 *comp_v1 = NULL;
400 int i, ent_count, obj_count;
402 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
403 comp_v1 = (struct lov_comp_md_v1 *)lum;
404 ent_count = comp_v1->lcm_entry_count;
405 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
406 lum->lmm_magic == LOV_MAGIC_V3) {
412 for (i = 0; i < ent_count; i++) {
414 lum = (struct lov_user_md *)((char *)comp_v1 +
415 comp_v1->lcm_entries[i].lcme_offset);
416 lum_size = comp_v1->lcm_entries[i].lcme_size;
418 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
421 if (!(comp_v1->lcm_entries[i].lcme_flags &
422 LCME_FL_INIT) && obj_count != 0)
424 } else if (obj_count != lum->lmm_stripe_count) {
432 * Convert the data from a lov_user_md to a newly allocated llapi_layout.
433 * The caller is responsible for freeing the returned pointer.
435 * \param[in] lov_xattr LOV user metadata xattr to copy data from
436 * \param[in] lov_xattr_size size the lov_xattr_size passed in
437 * \param[in] flags flags to control how layout is retrieved
439 * \retval valid llapi_layout pointer on success
440 * \retval NULL if memory allocation fails
442 struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
443 ssize_t lov_xattr_size,
444 enum llapi_layout_get_flags flags)
446 struct lov_user_md *lum = lov_xattr;
447 struct lov_comp_md_v1 *comp_v1 = NULL;
448 struct lov_comp_md_entry_v1 *ent;
449 struct lov_user_md *v1;
450 struct llapi_layout *layout = NULL;
451 struct llapi_layout_comp *comp;
452 int i, ent_count = 0, obj_count;
454 if (lov_xattr == NULL || lov_xattr_size <= 0) {
459 /* Return an error if we got back a partial layout. */
460 if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) {
465 #if __BYTE_ORDER == __BIG_ENDIAN
466 if (flags & LLAPI_LAYOUT_GET_COPY) {
467 lum = malloc(lov_xattr_size);
472 memcpy(lum, lov_xattr, lov_xattr_size);
476 llapi_layout_swab_lov_user_md(lum, lov_xattr_size);
478 #if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 16, 53, 0)
479 #define LLAPI_LXF_CHECK_OLD 0x0001
480 if (flags & LLAPI_LXF_CHECK_OLD)
481 flags = (flags & ~LLAPI_LXF_CHECK_OLD) | LLAPI_LAYOUT_GET_CHECK;
483 if ((flags & LLAPI_LAYOUT_GET_CHECK) &&
484 !llapi_layout_lum_valid(lum, lov_xattr_size)) {
489 layout = __llapi_layout_alloc();
490 if (layout == NULL) {
495 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
496 comp_v1 = (struct lov_comp_md_v1 *)lum;
497 ent_count = comp_v1->lcm_entry_count;
498 layout->llot_gen = comp_v1->lcm_layout_gen;
499 layout->llot_is_composite = true;
500 layout->llot_mirror_count = comp_v1->lcm_mirror_count + 1;
501 layout->llot_gen = comp_v1->lcm_layout_gen;
502 layout->llot_flags = comp_v1->lcm_flags;
503 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
504 lum->lmm_magic == LOV_MAGIC_V3) {
506 layout->llot_is_composite = false;
508 if (lov_xattr_size <= 0) {
517 if (ent_count == 0) {
522 v1 = (struct lov_user_md *)lum;
523 for (i = 0; i < ent_count; i++) {
524 if (comp_v1 != NULL) {
525 ent = &comp_v1->lcm_entries[i];
526 v1 = (struct lov_user_md *)((char *)comp_v1 +
528 lov_xattr_size = ent->lcme_size;
533 obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
534 comp = __llapi_comp_alloc(obj_count);
539 comp->llc_extent.e_start = ent->lcme_extent.e_start;
540 comp->llc_extent.e_end = ent->lcme_extent.e_end;
541 comp->llc_id = ent->lcme_id;
542 comp->llc_flags = ent->lcme_flags;
543 if (comp->llc_flags & LCME_FL_NOSYNC)
544 comp->llc_timestamp = ent->lcme_timestamp;
546 comp->llc_extent.e_start = 0;
547 comp->llc_extent.e_end = LUSTRE_EOF;
552 if (v1->lmm_pattern == LOV_PATTERN_RAID0)
553 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
554 else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 |
555 LOV_PATTERN_OVERSTRIPING))
556 comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING;
557 else if (v1->lmm_pattern == LOV_PATTERN_MDT)
558 comp->llc_pattern = LLAPI_LAYOUT_MDT;
560 /* Lustre only supports RAID0, overstripping
563 comp->llc_pattern = v1->lmm_pattern;
565 if (v1->lmm_stripe_size == 0)
566 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
568 comp->llc_stripe_size = v1->lmm_stripe_size;
570 if (v1->lmm_stripe_count == (typeof(v1->lmm_stripe_count))-1)
571 comp->llc_stripe_count = LLAPI_LAYOUT_WIDE;
572 else if (v1->lmm_stripe_count == 0)
573 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
575 comp->llc_stripe_count = v1->lmm_stripe_count;
577 if (v1->lmm_stripe_offset ==
578 (typeof(v1->lmm_stripe_offset))-1)
579 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
581 comp->llc_stripe_offset = v1->lmm_stripe_offset;
583 if (v1->lmm_magic != LOV_USER_MAGIC_V1) {
584 const struct lov_user_md_v3 *lumv3;
585 lumv3 = (struct lov_user_md_v3 *)v1;
586 snprintf(comp->llc_pool_name,
587 sizeof(comp->llc_pool_name),
588 "%s", lumv3->lmm_pool_name);
589 memcpy(comp->llc_objects, lumv3->lmm_objects,
590 obj_count * sizeof(lumv3->lmm_objects[0]));
592 const struct lov_user_md_v1 *lumv1;
593 lumv1 = (struct lov_user_md_v1 *)v1;
594 memcpy(comp->llc_objects, lumv1->lmm_objects,
595 obj_count * sizeof(lumv1->lmm_objects[0]));
599 comp->llc_stripe_offset =
600 comp->llc_objects[0].l_ost_idx;
602 comp->llc_ondisk = true;
603 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
604 layout->llot_cur_comp = comp;
608 if (lum != lov_xattr)
612 llapi_layout_free(layout);
617 __u32 llapi_pattern_to_lov(uint64_t pattern)
622 case LLAPI_LAYOUT_DEFAULT:
623 lov_pattern = LOV_PATTERN_RAID0;
625 case LLAPI_LAYOUT_RAID0:
626 lov_pattern = LOV_PATTERN_RAID0;
628 case LLAPI_LAYOUT_MDT:
629 lov_pattern = LOV_PATTERN_MDT;
631 case LLAPI_LAYOUT_OVERSTRIPING:
632 lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0;
635 lov_pattern = EINVAL;
642 * Convert the data from a llapi_layout to a newly allocated lov_user_md.
643 * The caller is responsible for freeing the returned pointer.
645 * \param[in] layout the layout to copy from
647 * \retval valid lov_user_md pointer on success
648 * \retval NULL if memory allocation fails or the layout is invalid
650 static struct lov_user_md *
651 llapi_layout_to_lum(const struct llapi_layout *layout)
653 struct llapi_layout_comp *comp;
654 struct lov_comp_md_v1 *comp_v1 = NULL;
655 struct lov_comp_md_entry_v1 *ent;
656 struct lov_user_md *lum = NULL;
661 if (layout == NULL ||
662 list_empty((struct list_head *)&layout->llot_comp_list)) {
667 /* Allocate header of lov_comp_md_v1 if necessary */
668 if (layout->llot_is_composite) {
671 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
674 lum_size = sizeof(*comp_v1) + comp_cnt * sizeof(*ent);
675 lum = calloc(lum_size, 1);
680 comp_v1 = (struct lov_comp_md_v1 *)lum;
681 comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
682 comp_v1->lcm_size = lum_size;
683 comp_v1->lcm_layout_gen = 0;
684 comp_v1->lcm_flags = layout->llot_flags;
685 comp_v1->lcm_entry_count = comp_cnt;
686 comp_v1->lcm_mirror_count = layout->llot_mirror_count - 1;
690 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
691 struct lov_user_md *blob;
694 int i, obj_count = 0;
695 struct lov_user_ost_data *lmm_objects;
696 uint64_t pattern = comp->llc_pattern;
698 if ((pattern & LLAPI_LAYOUT_SPECIFIC) != 0) {
699 if (comp->llc_objects_count <
700 comp->llc_stripe_count) {
704 magic = LOV_USER_MAGIC_SPECIFIC;
705 obj_count = comp->llc_stripe_count;
706 pattern &= ~LLAPI_LAYOUT_SPECIFIC;
707 } else if (strlen(comp->llc_pool_name) != 0) {
708 magic = LOV_USER_MAGIC_V3;
710 magic = LOV_USER_MAGIC_V1;
712 /* All stripes must be specified when the pattern contains
713 * LLAPI_LAYOUT_SPECIFIC */
714 for (i = 0; i < obj_count; i++) {
715 if (comp->llc_objects[i].l_ost_idx ==
716 LLAPI_LAYOUT_IDX_MAX) {
722 blob_size = lov_user_md_size(obj_count, magic);
723 blob = realloc(lum, lum_size + blob_size);
729 comp_v1 = (struct lov_comp_md_v1 *)lum;
730 blob = (struct lov_user_md *)((char *)lum + lum_size);
731 lum_size += blob_size;
734 blob->lmm_magic = magic;
735 blob->lmm_pattern = llapi_pattern_to_lov(pattern);
736 if (blob->lmm_pattern == EINVAL) {
741 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
742 blob->lmm_stripe_size = 0;
744 blob->lmm_stripe_size = comp->llc_stripe_size;
746 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
747 blob->lmm_stripe_count = 0;
748 else if (comp->llc_stripe_count == LLAPI_LAYOUT_WIDE)
749 blob->lmm_stripe_count = LOV_ALL_STRIPES;
751 blob->lmm_stripe_count = comp->llc_stripe_count;
753 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
754 blob->lmm_stripe_offset = -1;
756 blob->lmm_stripe_offset = comp->llc_stripe_offset;
758 if (magic == LOV_USER_MAGIC_V3 ||
759 magic == LOV_USER_MAGIC_SPECIFIC) {
760 struct lov_user_md_v3 *lumv3 =
761 (struct lov_user_md_v3 *)blob;
763 if (comp->llc_pool_name[0] != '\0') {
764 strncpy(lumv3->lmm_pool_name,
766 sizeof(lumv3->lmm_pool_name));
768 memset(lumv3->lmm_pool_name, 0,
769 sizeof(lumv3->lmm_pool_name));
771 lmm_objects = lumv3->lmm_objects;
773 lmm_objects = blob->lmm_objects;
776 for (i = 0; i < obj_count; i++)
777 lmm_objects[i].l_ost_idx =
778 comp->llc_objects[i].l_ost_idx;
780 if (layout->llot_is_composite) {
781 ent = &comp_v1->lcm_entries[ent_idx];
782 ent->lcme_id = comp->llc_id;
783 ent->lcme_flags = comp->llc_flags;
784 if (ent->lcme_flags & LCME_FL_NOSYNC)
785 ent->lcme_timestamp = comp->llc_timestamp;
786 ent->lcme_extent.e_start = comp->llc_extent.e_start;
787 ent->lcme_extent.e_end = comp->llc_extent.e_end;
788 ent->lcme_size = blob_size;
789 ent->lcme_offset = offset;
791 comp_v1->lcm_size += blob_size;
805 * Get the parent directory of a path.
807 * \param[in] path path to get parent of
808 * \param[out] buf buffer in which to store parent path
809 * \param[in] size size in bytes of buffer \a buf
811 static void get_parent_dir(const char *path, char *buf, size_t size)
815 strncpy(buf, path, size - 1);
816 p = strrchr(buf, '/');
820 } else if (size >= 2) {
821 strncpy(buf, ".", 2);
822 buf[size - 1] = '\0';
827 * Substitute unspecified attribute values in \a layout with values
828 * from fs global settings. (lov.stripesize, lov.stripecount,
831 * \param[in] layout layout to inherit values from
832 * \param[in] path file path of the filesystem
834 static void inherit_sys_attributes(struct llapi_layout *layout,
837 struct llapi_layout_comp *comp;
838 unsigned int ssize, scount, soffset;
841 rc = sattr_cache_get_defaults(NULL, path, &scount, &ssize, &soffset);
845 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
846 if (comp->llc_pattern == LLAPI_LAYOUT_DEFAULT)
847 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
848 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
849 comp->llc_stripe_size = ssize;
850 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
851 comp->llc_stripe_count = scount;
852 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
853 comp->llc_stripe_offset = soffset;
858 * Get the current component of \a layout.
860 * \param[in] layout layout to get current component
862 * \retval valid llapi_layout_comp pointer on success
863 * \retval NULL on error
865 static struct llapi_layout_comp *
866 __llapi_layout_cur_comp(const struct llapi_layout *layout)
868 struct llapi_layout_comp *comp;
870 if (layout == NULL || layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
874 if (layout->llot_cur_comp == NULL) {
878 /* Verify data consistency */
879 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
880 if (comp == layout->llot_cur_comp)
887 * Test if any attributes of \a layout are specified.
889 * \param[in] layout the layout to check
891 * \retval true any attributes are specified
892 * \retval false all attributes are unspecified
894 static bool is_any_specified(const struct llapi_layout *layout)
896 struct llapi_layout_comp *comp;
898 comp = __llapi_layout_cur_comp(layout);
902 if (layout->llot_is_composite || layout->llot_mirror_count != 1)
905 return comp->llc_pattern != LLAPI_LAYOUT_DEFAULT ||
906 comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT ||
907 comp->llc_stripe_count != LLAPI_LAYOUT_DEFAULT ||
908 comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT ||
909 strlen(comp->llc_pool_name);
913 * Get the striping layout for the file referenced by file descriptor \a fd.
915 * If the filesystem does not support the "lustre." xattr namespace, the
916 * file must be on a non-Lustre filesystem, so set errno to ENOTTY per
917 * convention. If the file has no "lustre.lov" data, the file will
918 * inherit default values, so return a default layout.
920 * If the kernel gives us back less than the expected amount of data,
921 * we fail with errno set to EINTR.
923 * \param[in] fd open file descriptor
924 * \param[in] flags open file descriptor
926 * \retval valid llapi_layout pointer on success
927 * \retval NULL if an error occurs
929 struct llapi_layout *llapi_layout_get_by_fd(int fd,
930 enum llapi_layout_get_flags flags)
933 struct lov_user_md *lum;
934 struct llapi_layout *layout = NULL;
938 lum_len = XATTR_SIZE_MAX;
939 lum = malloc(lum_len);
943 bytes_read = fgetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_len);
944 if (bytes_read < 0) {
945 if (errno == EOPNOTSUPP)
947 else if (errno == ENODATA)
948 layout = llapi_layout_alloc();
952 /* Directories may have a positive non-zero lum->lmm_stripe_count
953 * yet have an empty lum->lmm_objects array. For non-directories the
954 * amount of data returned from the kernel must be consistent
955 * with the stripe count. */
956 if (fstat(fd, &st) < 0)
959 layout = llapi_layout_get_by_xattr(lum, bytes_read,
960 S_ISDIR(st.st_mode) ? 0 : LLAPI_LAYOUT_GET_CHECK);
967 * Get the expected striping layout for a file at \a path.
969 * Substitute expected inherited attribute values for unspecified
970 * attributes. Unspecified attributes may belong to directories and
971 * never-written-to files, and indicate that default values will be
972 * assigned when files are created or first written to. A default value
973 * is inherited from the parent directory if the attribute is specified
974 * there, otherwise it is inherited from the filesystem root.
975 * Unspecified attributes normally have the value LLAPI_LAYOUT_DEFAULT.
977 * The complete \a path need not refer to an existing file or directory,
978 * but some leading portion of it must reside within a lustre filesystem.
979 * A use case for this interface would be to obtain the literal striping
980 * values that would be assigned to a new file in a given directory.
982 * \param[in] path path for which to get the expected layout
984 * \retval valid llapi_layout pointer on success
985 * \retval NULL if an error occurs
987 static struct llapi_layout *llapi_layout_expected(const char *path)
989 struct llapi_layout *path_layout = NULL;
990 char donor_path[PATH_MAX];
995 fd = open(path, O_RDONLY);
996 if (fd < 0 && errno != ENOENT)
1002 path_layout = llapi_layout_get_by_fd(fd, 0);
1008 if (path_layout == NULL) {
1009 if (errno != ENODATA && errno != ENOENT)
1012 path_layout = llapi_layout_alloc();
1013 if (path_layout == NULL)
1017 if (is_any_specified(path_layout)) {
1018 inherit_sys_attributes(path_layout, path);
1022 llapi_layout_free(path_layout);
1024 rc = stat(path, &st);
1025 if (rc < 0 && errno != ENOENT)
1028 /* If path is a not a directory or doesn't exist, inherit layout
1029 * from parent directory. */
1030 if ((rc == 0 && !S_ISDIR(st.st_mode)) ||
1031 (rc < 0 && errno == ENOENT)) {
1032 get_parent_dir(path, donor_path, sizeof(donor_path));
1033 path_layout = llapi_layout_get_by_path(donor_path, 0);
1034 if (path_layout != NULL) {
1035 if (is_any_specified(path_layout)) {
1036 inherit_sys_attributes(path_layout, donor_path);
1039 llapi_layout_free(path_layout);
1043 /* Inherit layout from the filesystem root. */
1044 rc = llapi_search_mounts(path, 0, donor_path, NULL);
1047 path_layout = llapi_layout_get_by_path(donor_path, 0);
1048 if (path_layout == NULL)
1051 inherit_sys_attributes(path_layout, donor_path);
1056 * Get the striping layout for the file at \a path.
1058 * If \a flags contains LLAPI_LAYOUT_GET_EXPECTED, substitute
1059 * expected inherited attribute values for unspecified attributes. See
1060 * llapi_layout_expected().
1062 * \param[in] path path for which to get the layout
1063 * \param[in] flags flags to control how layout is retrieved
1065 * \retval valid llapi_layout pointer on success
1066 * \retval NULL if an error occurs
1068 struct llapi_layout *llapi_layout_get_by_path(const char *path,
1069 enum llapi_layout_get_flags flags)
1071 struct llapi_layout *layout = NULL;
1072 bool failed = false;
1077 if (flags & LLAPI_LAYOUT_GET_EXPECTED)
1078 return llapi_layout_expected(path);
1080 /* Always get layout in O_DIRECT */
1081 /* Allow fetching layout even without the key on encrypted files */
1082 open_flags = O_RDONLY | O_DIRECT | O_FILE_ENC;
1084 fd = open(path, open_flags);
1086 if (errno != EINVAL || failed)
1088 /* EINVAL is because a directory cannot be opened in O_DIRECT */
1089 open_flags = O_RDONLY | O_FILE_ENC;
1094 layout = llapi_layout_get_by_fd(fd, flags);
1103 * Get the layout for the file with FID \a fidstr in filesystem \a lustre_dir.
1105 * \param[in] lustre_dir path within Lustre filesystem containing \a fid
1106 * \param[in] fid Lustre identifier of file to get layout for
1108 * \retval valid llapi_layout pointer on success
1109 * \retval NULL if an error occurs
1111 struct llapi_layout *llapi_layout_get_by_fid(const char *lustre_dir,
1112 const struct lu_fid *fid,
1113 enum llapi_layout_get_flags flags)
1117 int saved_msg_level = llapi_msg_get_level();
1118 struct llapi_layout *layout = NULL;
1120 /* Prevent llapi internal routines from writing to console
1121 * while executing this function, then restore previous message
1123 llapi_msg_set_level(LLAPI_MSG_OFF);
1124 fd = llapi_open_by_fid(lustre_dir, fid, O_RDONLY);
1125 llapi_msg_set_level(saved_msg_level);
1130 layout = llapi_layout_get_by_fd(fd, flags);
1139 * Get the stripe count of \a layout.
1141 * \param[in] layout layout to get stripe count from
1142 * \param[out] count integer to store stripe count in
1144 * \retval 0 on success
1145 * \retval -1 if arguments are invalid
1147 int llapi_layout_stripe_count_get(const struct llapi_layout *layout,
1150 struct llapi_layout_comp *comp;
1152 comp = __llapi_layout_cur_comp(layout);
1156 if (count == NULL) {
1161 *count = comp->llc_stripe_count;
1167 * The llapi_layout API functions have these extra validity checks since
1168 * they use intuitively named macros to denote special behavior, whereas
1169 * the old API uses 0 and -1.
1172 bool llapi_layout_stripe_count_is_valid(int64_t stripe_count)
1174 return stripe_count == LLAPI_LAYOUT_DEFAULT ||
1175 stripe_count == LLAPI_LAYOUT_WIDE ||
1176 (stripe_count != 0 && stripe_count != -1 &&
1177 llapi_stripe_count_is_valid(stripe_count));
1180 static bool llapi_layout_extension_size_is_valid(uint64_t ext_size)
1182 return (ext_size != 0 &&
1183 llapi_stripe_size_is_aligned(ext_size) &&
1184 !llapi_stripe_size_is_too_big(ext_size));
1187 static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size)
1189 return stripe_size == LLAPI_LAYOUT_DEFAULT ||
1190 (stripe_size != 0 &&
1191 llapi_stripe_size_is_aligned(stripe_size) &&
1192 !llapi_stripe_size_is_too_big(stripe_size));
1195 static bool llapi_layout_stripe_index_is_valid(int64_t stripe_index)
1197 return stripe_index == LLAPI_LAYOUT_DEFAULT ||
1198 (stripe_index >= 0 &&
1199 llapi_stripe_index_is_valid(stripe_index));
1203 * Set the stripe count of \a layout.
1205 * \param[in] layout layout to set stripe count in
1206 * \param[in] count value to be set
1208 * \retval 0 on success
1209 * \retval -1 if arguments are invalid
1211 int llapi_layout_stripe_count_set(struct llapi_layout *layout,
1214 struct llapi_layout_comp *comp;
1216 comp = __llapi_layout_cur_comp(layout);
1220 if (!llapi_layout_stripe_count_is_valid(count)) {
1225 comp->llc_stripe_count = count;
1231 * Get the stripe/extension size of \a layout.
1233 * \param[in] layout layout to get stripe size from
1234 * \param[out] size integer to store stripe size in
1235 * \param[in] extension flag if extenion size is requested
1237 * \retval 0 on success
1238 * \retval -1 if arguments are invalid
1240 static int layout_stripe_size_get(const struct llapi_layout *layout,
1241 uint64_t *size, bool extension)
1243 struct llapi_layout_comp *comp;
1246 comp = __llapi_layout_cur_comp(layout);
1255 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1256 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1261 *size = comp->llc_stripe_size;
1262 if (comp->llc_flags & LCME_FL_EXTENSION)
1263 *size *= SEL_UNIT_SIZE;
1268 int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
1271 return layout_stripe_size_get(layout, size, false);
1274 int llapi_layout_extension_size_get(const struct llapi_layout *layout,
1277 return layout_stripe_size_get(layout, size, true);
1281 * Set the stripe/extension size of \a layout.
1283 * \param[in] layout layout to set stripe size in
1284 * \param[in] size value to be set
1285 * \param[in] extension flag if extenion size is passed
1287 * \retval 0 on success
1288 * \retval -1 if arguments are invalid
1290 static int layout_stripe_size_set(struct llapi_layout *layout,
1291 uint64_t size, bool extension)
1293 struct llapi_layout_comp *comp;
1296 comp = __llapi_layout_cur_comp(layout);
1300 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1301 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1307 size /= SEL_UNIT_SIZE;
1309 if ((comp_ext && !llapi_layout_extension_size_is_valid(size)) ||
1310 (!comp_ext && !llapi_layout_stripe_size_is_valid(size))) {
1315 comp->llc_stripe_size = size;
1319 int llapi_layout_stripe_size_set(struct llapi_layout *layout,
1322 return layout_stripe_size_set(layout, size, false);
1325 int llapi_layout_extension_size_set(struct llapi_layout *layout,
1328 return layout_stripe_size_set(layout, size, true);
1332 * Get the RAID pattern of \a layout.
1334 * \param[in] layout layout to get pattern from
1335 * \param[out] pattern integer to store pattern in
1337 * \retval 0 on success
1338 * \retval -1 if arguments are invalid
1340 int llapi_layout_pattern_get(const struct llapi_layout *layout,
1343 struct llapi_layout_comp *comp;
1345 comp = __llapi_layout_cur_comp(layout);
1349 if (pattern == NULL) {
1354 *pattern = comp->llc_pattern;
1360 * Set the pattern of \a layout.
1362 * \param[in] layout layout to set pattern in
1363 * \param[in] pattern value to be set
1365 * \retval 0 on success
1366 * \retval -1 if arguments are invalid or RAID pattern
1369 int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
1371 struct llapi_layout_comp *comp;
1373 comp = __llapi_layout_cur_comp(layout);
1377 if (pattern != LLAPI_LAYOUT_DEFAULT &&
1378 pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT
1379 && pattern != LLAPI_LAYOUT_OVERSTRIPING) {
1384 comp->llc_pattern = pattern |
1385 (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC);
1390 static inline int stripe_number_roundup(int stripe_number)
1392 unsigned int round_up = (stripe_number + 8) & ~7;
1393 return round_up > LOV_MAX_STRIPE_COUNT ?
1394 LOV_MAX_STRIPE_COUNT : round_up;
1398 * Set the OST index of stripe number \a stripe_number to \a ost_index.
1400 * If only the starting stripe's OST index is specified, then this can use
1401 * the normal LOV_MAGIC_{V1,V3} layout type. If multiple OST indices are
1402 * given, then allocate an array to hold the list of indices and ensure that
1403 * the LOV_USER_MAGIC_SPECIFIC layout is used when creating the file.
1405 * \param[in] layout layout to set OST index in
1406 * \param[in] stripe_number stripe number to set index for
1407 * \param[in] ost_index the index to set
1409 * \retval 0 on success
1410 * \retval -1 if arguments are invalid or an unsupported stripe number
1411 * was specified, error returned in errno
1413 int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number,
1416 struct llapi_layout_comp *comp;
1418 comp = __llapi_layout_cur_comp(layout);
1422 if (!llapi_layout_stripe_index_is_valid(ost_index)) {
1427 if (stripe_number == 0 && ost_index == LLAPI_LAYOUT_DEFAULT) {
1428 comp->llc_stripe_offset = ost_index;
1429 comp->llc_pattern &= ~LLAPI_LAYOUT_SPECIFIC;
1430 __llapi_comp_objects_realloc(comp, 0);
1431 } else if (stripe_number >= 0 &&
1432 stripe_number < LOV_MAX_STRIPE_COUNT) {
1433 if (ost_index >= LLAPI_LAYOUT_IDX_MAX) {
1438 /* Preallocate a few more stripes to avoid realloc() overhead.*/
1439 if (__llapi_comp_objects_realloc(comp,
1440 stripe_number_roundup(stripe_number)) < 0)
1443 comp->llc_objects[stripe_number].l_ost_idx = ost_index;
1445 if (stripe_number == 0)
1446 comp->llc_stripe_offset = ost_index;
1448 comp->llc_pattern |= LLAPI_LAYOUT_SPECIFIC;
1450 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT ||
1451 comp->llc_stripe_count <= stripe_number)
1452 comp->llc_stripe_count = stripe_number + 1;
1462 * Get the OST index associated with stripe \a stripe_number.
1464 * Stripes are indexed starting from zero.
1466 * \param[in] layout layout to get index from
1467 * \param[in] stripe_number stripe number to get index for
1468 * \param[out] index integer to store index in
1470 * \retval 0 on success
1471 * \retval -1 if arguments are invalid
1473 int llapi_layout_ost_index_get(const struct llapi_layout *layout,
1474 uint64_t stripe_number, uint64_t *index)
1476 struct llapi_layout_comp *comp;
1478 comp = __llapi_layout_cur_comp(layout);
1482 if (index == NULL) {
1487 if (stripe_number >= comp->llc_stripe_count ||
1488 stripe_number >= comp->llc_objects_count) {
1493 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
1494 *index = LLAPI_LAYOUT_DEFAULT;
1496 *index = comp->llc_objects[stripe_number].l_ost_idx;
1503 * Get the pool name of layout \a layout.
1505 * \param[in] layout layout to get pool name from
1506 * \param[out] dest buffer to store pool name in
1507 * \param[in] n size in bytes of buffer \a dest
1509 * \retval 0 on success
1510 * \retval -1 if arguments are invalid
1512 int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest,
1515 struct llapi_layout_comp *comp;
1517 comp = __llapi_layout_cur_comp(layout);
1526 strncpy(dest, comp->llc_pool_name, n);
1532 * Set the name of the pool of layout \a layout.
1534 * \param[in] layout layout to set pool name in
1535 * \param[in] pool_name pool name to set
1537 * \retval 0 on success
1538 * \retval -1 if arguments are invalid or pool name is too long
1540 int llapi_layout_pool_name_set(struct llapi_layout *layout,
1543 struct llapi_layout_comp *comp;
1545 comp = __llapi_layout_cur_comp(layout);
1549 if (!llapi_pool_name_is_valid(&pool_name, NULL)) {
1554 strncpy(comp->llc_pool_name, pool_name, sizeof(comp->llc_pool_name));
1559 * Open and possibly create a file with a given \a layout.
1561 * If \a layout is NULL this function acts as a simple wrapper for
1562 * open(). By convention, ENOTTY is returned in errno if \a path
1563 * refers to a non-Lustre file.
1565 * \param[in] path name of the file to open
1566 * \param[in] open_flags open() flags
1567 * \param[in] mode permissions to create file, filtered by umask
1568 * \param[in] layout layout to create new file with
1570 * \retval non-negative file descriptor on successful open
1571 * \retval -1 if an error occurred
1573 int llapi_layout_file_open(const char *path, int open_flags, mode_t mode,
1574 const struct llapi_layout *layout)
1579 struct lov_user_md *lum;
1583 (layout != NULL && layout->llot_magic != LLAPI_LAYOUT_MAGIC)) {
1589 rc = llapi_layout_sanity((struct llapi_layout *)layout,
1591 !!(layout->llot_mirror_count > 1));
1593 llapi_layout_sanity_perror(rc);
1598 /* Object creation must be postponed until after layout attributes
1599 * have been applied. */
1600 if (layout != NULL && (open_flags & O_CREAT))
1601 open_flags |= O_LOV_DELAY_CREATE;
1603 fd = open(path, open_flags, mode);
1605 if (layout == NULL || fd < 0)
1608 lum = llapi_layout_to_lum(layout);
1617 if (lum->lmm_magic == LOV_USER_MAGIC_COMP_V1)
1618 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
1619 else if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC)
1620 lum_size = lov_user_md_size(lum->lmm_stripe_count,
1623 lum_size = lov_user_md_size(0, lum->lmm_magic);
1625 rc = fsetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_size, 0);
1634 errno = errno == EOPNOTSUPP ? ENOTTY : errno;
1640 * Create a file with a given \a layout.
1642 * Force O_CREAT and O_EXCL flags on so caller is assured that file was
1643 * created with the given \a layout on successful function return.
1645 * \param[in] path name of the file to open
1646 * \param[in] open_flags open() flags
1647 * \param[in] mode permissions to create new file with
1648 * \param[in] layout layout to create new file with
1650 * \retval non-negative file descriptor on successful open
1651 * \retval -1 if an error occurred
1653 int llapi_layout_file_create(const char *path, int open_flags, int mode,
1654 const struct llapi_layout *layout)
1656 return llapi_layout_file_open(path, open_flags|O_CREAT|O_EXCL, mode,
1660 int llapi_layout_flags_get(struct llapi_layout *layout, uint32_t *flags)
1662 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1667 *flags = layout->llot_flags;
1672 * Set flags to the header of a component layout.
1674 int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
1676 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1681 layout->llot_flags = flags;
1685 const char *llapi_layout_flags_string(uint32_t flags)
1687 switch (flags & LCM_FL_FLR_MASK) {
1690 case LCM_FL_WRITE_PENDING:
1692 case LCM_FL_SYNC_PENDING:
1699 __u16 llapi_layout_string_flags(char *string)
1701 if (strncmp(string, "ro", strlen(string)) == 0)
1702 return LCM_FL_RDONLY;
1703 if (strncmp(string, "wp", strlen(string)) == 0)
1704 return LCM_FL_WRITE_PENDING;
1705 if (strncmp(string, "sp", strlen(string)) == 0)
1706 return LCM_FL_SYNC_PENDING;
1712 * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
1713 * @count: Mirror count value to be checked.
1715 * This function checks the validity of mirror count.
1717 * Return: true on success or false on failure.
1719 static bool llapi_layout_mirror_count_is_valid(uint16_t count)
1721 return count >= 0 && count <= LUSTRE_MIRROR_COUNT_MAX;
1725 * llapi_layout_mirror_count_get() - Get mirror count from the header of
1727 * @layout: Layout to get mirror count from.
1728 * @count: Returned mirror count value.
1730 * This function gets mirror count from the header of a layout.
1732 * Return: 0 on success or -1 on failure.
1734 int llapi_layout_mirror_count_get(struct llapi_layout *layout,
1737 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1742 *count = layout->llot_mirror_count;
1747 * llapi_layout_mirror_count_set() - Set mirror count to the header of a layout.
1748 * @layout: Layout to set mirror count in.
1749 * @count: Mirror count value to be set.
1751 * This function sets mirror count to the header of a layout.
1753 * Return: 0 on success or -1 on failure.
1755 int llapi_layout_mirror_count_set(struct llapi_layout *layout,
1758 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1763 if (!llapi_layout_mirror_count_is_valid(count)) {
1768 layout->llot_mirror_count = count;
1773 * Fetch the start and end offset of the current layout component.
1775 * \param[in] layout the layout component
1776 * \param[out] start extent start, inclusive
1777 * \param[out] end extent end, exclusive
1779 * \retval 0 on success
1780 * \retval <0 if error occurs
1782 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
1783 uint64_t *start, uint64_t *end)
1785 struct llapi_layout_comp *comp;
1787 comp = __llapi_layout_cur_comp(layout);
1791 if (start == NULL || end == NULL) {
1796 *start = comp->llc_extent.e_start;
1797 *end = comp->llc_extent.e_end;
1803 * Set the layout extent of a layout.
1805 * \param[in] layout the layout to be set
1806 * \param[in] start extent start, inclusive
1807 * \param[in] end extent end, exclusive
1809 * \retval 0 on success
1810 * \retval <0 if error occurs
1812 int llapi_layout_comp_extent_set(struct llapi_layout *layout,
1813 uint64_t start, uint64_t end)
1815 struct llapi_layout_comp *comp;
1817 comp = __llapi_layout_cur_comp(layout);
1826 comp->llc_extent.e_start = start;
1827 comp->llc_extent.e_end = end;
1828 layout->llot_is_composite = true;
1834 * Gets the attribute flags of the current component.
1836 * \param[in] layout the layout component
1837 * \param[out] flags stored the returned component flags
1839 * \retval 0 on success
1840 * \retval <0 if error occurs
1842 int llapi_layout_comp_flags_get(const struct llapi_layout *layout,
1845 struct llapi_layout_comp *comp;
1847 comp = __llapi_layout_cur_comp(layout);
1851 if (flags == NULL) {
1856 *flags = comp->llc_flags;
1862 * Sets the specified flags of the current component leaving other flags as-is.
1864 * \param[in] layout the layout component
1865 * \param[in] flags component flags to be set
1867 * \retval 0 on success
1868 * \retval <0 if error occurs
1870 int llapi_layout_comp_flags_set(struct llapi_layout *layout, uint32_t flags)
1872 struct llapi_layout_comp *comp;
1874 comp = __llapi_layout_cur_comp(layout);
1878 comp->llc_flags |= flags;
1884 * Clears the flags specified in the flags leaving other flags as-is.
1886 * \param[in] layout the layout component
1887 * \param[in] flags component flags to be cleared
1889 * \retval 0 on success
1890 * \retval <0 if error occurs
1892 int llapi_layout_comp_flags_clear(struct llapi_layout *layout,
1895 struct llapi_layout_comp *comp;
1897 comp = __llapi_layout_cur_comp(layout);
1901 comp->llc_flags &= ~flags;
1907 * Fetches the file-unique component ID of the current layout component.
1909 * \param[in] layout the layout component
1910 * \param[out] id stored the returned component ID
1912 * \retval 0 on success
1913 * \retval <0 if error occurs
1915 int llapi_layout_comp_id_get(const struct llapi_layout *layout, uint32_t *id)
1917 struct llapi_layout_comp *comp;
1919 comp = __llapi_layout_cur_comp(layout);
1933 * Return the mirror id of the current layout component.
1935 * \param[in] layout the layout component
1936 * \param[out] id stored the returned mirror ID
1938 * \retval 0 on success
1939 * \retval <0 if error occurs
1941 int llapi_layout_mirror_id_get(const struct llapi_layout *layout, uint32_t *id)
1943 struct llapi_layout_comp *comp;
1945 comp = __llapi_layout_cur_comp(layout);
1954 *id = mirror_id_of(comp->llc_id);
1960 * Adds a component to \a layout, the new component will be added to
1961 * the tail of components list and it'll inherit attributes of existing
1962 * ones. The \a layout will change it's current component pointer to
1963 * the newly added component, and it'll be turned into a composite
1964 * layout if it was not before the adding.
1966 * \param[in] layout existing composite or plain layout
1968 * \retval 0 on success
1969 * \retval <0 if error occurs
1971 int llapi_layout_comp_add(struct llapi_layout *layout)
1973 struct llapi_layout_comp *last, *comp, *new;
1974 bool composite = layout->llot_is_composite;
1976 comp = __llapi_layout_cur_comp(layout);
1980 new = __llapi_comp_alloc(0);
1984 last = list_entry(layout->llot_comp_list.prev, typeof(*last),
1987 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1989 /* We must mark the layout composite for the sanity check, but it may
1990 * not stay that way if the check fails */
1991 layout->llot_is_composite = true;
1992 layout->llot_cur_comp = new;
1994 /* We need to set a temporary non-zero value for "end" when we call
1995 * comp_extent_set, so we use LUSTRE_EOF-1, which is > all allowed
1996 * for the end of the previous component. (If we're adding this
1997 * component, the end of the previous component cannot be EOF.) */
1998 if (llapi_layout_comp_extent_set(layout, last->llc_extent.e_end,
2000 llapi_layout_comp_del(layout);
2001 layout->llot_is_composite = composite;
2008 * Adds a first component of a mirror to \a layout.
2009 * The \a layout will change it's current component pointer to
2010 * the newly added component, and it'll be turned into a composite
2011 * layout if it was not before the adding.
2013 * \param[in] layout existing composite or plain layout
2015 * \retval 0 on success
2016 * \retval <0 if error occurs
2018 int llapi_layout_add_first_comp(struct llapi_layout *layout)
2020 struct llapi_layout_comp *comp, *new;
2022 comp = __llapi_layout_cur_comp(layout);
2026 new = __llapi_comp_alloc(0);
2030 new->llc_extent.e_start = 0;
2032 list_add_tail(&new->llc_list, &layout->llot_comp_list);
2033 layout->llot_cur_comp = new;
2034 layout->llot_is_composite = true;
2040 * Deletes current component from the composite layout. The component
2041 * to be deleted must be the tail of components list, and it can't be
2042 * the only component in the layout.
2044 * \param[in] layout composite layout
2046 * \retval 0 on success
2047 * \retval <0 if error occurs
2049 int llapi_layout_comp_del(struct llapi_layout *layout)
2051 struct llapi_layout_comp *comp;
2053 comp = __llapi_layout_cur_comp(layout);
2057 if (!layout->llot_is_composite) {
2062 /* It must be the tail of the list (for PFL, can be relaxed
2063 * once we get mirrored components) */
2064 if (comp->llc_list.next != &layout->llot_comp_list) {
2068 layout->llot_cur_comp =
2069 list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
2070 if (comp->llc_list.prev == &layout->llot_comp_list)
2071 layout->llot_cur_comp = NULL;
2073 list_del_init(&comp->llc_list);
2074 __llapi_comp_free(comp);
2080 * Move the current component pointer to the component with
2081 * specified component ID.
2083 * \param[in] layout composite layout
2084 * \param[in] id component ID
2086 * \retval =0 : moved successfully
2087 * \retval <0 if error occurs
2089 int llapi_layout_comp_use_id(struct llapi_layout *layout, uint32_t comp_id)
2091 struct llapi_layout_comp *comp;
2093 comp = __llapi_layout_cur_comp(layout);
2095 return -1; /* use previously set errno */
2097 if (!layout->llot_is_composite) {
2102 if (comp_id == LCME_ID_INVAL) {
2107 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
2108 if (comp->llc_id == comp_id) {
2109 layout->llot_cur_comp = comp;
2118 * Move the current component pointer to a specified position.
2120 * \param[in] layout composite layout
2121 * \param[in] pos the position to be moved, it can be:
2122 * LLAPI_LAYOUT_COMP_USE_FIRST: use first component
2123 * LLAPI_LAYOUT_COMP_USE_LAST: use last component
2124 * LLAPI_LAYOUT_COMP_USE_NEXT: use component after current
2125 * LLAPI_LAYOUT_COMP_USE_PREV: use component before current
2127 * \retval =0 : moved successfully
2128 * \retval =1 : at last component with NEXT, at first component with PREV
2129 * \retval <0 if error occurs
2131 int llapi_layout_comp_use(struct llapi_layout *layout,
2132 enum llapi_layout_comp_use pos)
2134 struct llapi_layout_comp *comp, *head, *tail;
2136 comp = __llapi_layout_cur_comp(layout);
2140 if (!layout->llot_is_composite) {
2141 if (pos == LLAPI_LAYOUT_COMP_USE_FIRST ||
2142 pos == LLAPI_LAYOUT_COMP_USE_LAST)
2148 head = list_entry(layout->llot_comp_list.next, typeof(*head), llc_list);
2149 tail = list_entry(layout->llot_comp_list.prev, typeof(*tail), llc_list);
2151 case LLAPI_LAYOUT_COMP_USE_FIRST:
2152 layout->llot_cur_comp = head;
2154 case LLAPI_LAYOUT_COMP_USE_NEXT:
2159 layout->llot_cur_comp = list_entry(comp->llc_list.next,
2160 typeof(*comp), llc_list);
2162 case LLAPI_LAYOUT_COMP_USE_LAST:
2163 layout->llot_cur_comp = tail;
2165 case LLAPI_LAYOUT_COMP_USE_PREV:
2170 layout->llot_cur_comp = list_entry(comp->llc_list.prev,
2171 typeof(*comp), llc_list);
2182 * Add layout component(s) to an existing file.
2184 * \param[in] path The path name of the file
2185 * \param[in] layout The layout component(s) to be added
2187 int llapi_layout_file_comp_add(const char *path,
2188 const struct llapi_layout *layout)
2190 int rc, fd = -1, lum_size, tmp_errno = 0;
2191 struct llapi_layout *existing_layout = NULL;
2192 struct lov_user_md *lum = NULL;
2194 if (path == NULL || layout == NULL ||
2195 layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
2200 fd = open(path, O_RDWR);
2207 existing_layout = llapi_layout_get_by_fd(fd, 0);
2208 if (existing_layout == NULL) {
2214 rc = llapi_layout_merge(&existing_layout, layout);
2221 rc = llapi_layout_sanity(existing_layout, path, false, false);
2224 llapi_layout_sanity_perror(rc);
2229 lum = llapi_layout_to_lum(layout);
2236 if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) {
2241 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2243 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".add", lum, lum_size, 0);
2253 llapi_layout_free(existing_layout);
2259 * Delete component(s) by the specified component id or component flags
2260 * from an existing file.
2262 * \param[in] path path name of the file
2263 * \param[in] id unique component ID
2264 * \param[in] flags flags: LCME_FL_* or;
2265 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2267 int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags)
2269 int rc = 0, fd = -1, lum_size, tmp_errno = 0;
2270 struct llapi_layout *layout;
2271 struct llapi_layout_comp *comp, *next;
2272 struct llapi_layout *existing_layout = NULL;
2273 struct lov_user_md *lum = NULL;
2275 if (path == NULL || id > LCME_ID_MAX || (flags & ~LCME_KNOWN_FLAGS)) {
2280 /* Can only specify ID or flags, not both, not none. */
2281 if ((id != LCME_ID_INVAL && flags != 0) ||
2282 (id == LCME_ID_INVAL && flags == 0)) {
2287 layout = llapi_layout_alloc();
2291 llapi_layout_comp_extent_set(layout, 0, LUSTRE_EOF);
2292 comp = __llapi_layout_cur_comp(layout);
2300 comp->llc_flags = flags;
2302 lum = llapi_layout_to_lum(layout);
2308 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2310 fd = open(path, O_RDWR);
2317 existing_layout = llapi_layout_get_by_fd(fd, 0);
2318 if (existing_layout == NULL) {
2326 while (rc == 0 && existing_layout->llot_cur_comp != NULL) {
2327 rc = llapi_layout_comp_use(existing_layout, comp ?
2328 LLAPI_LAYOUT_COMP_USE_PREV :
2329 LLAPI_LAYOUT_COMP_USE_LAST);
2334 comp = __llapi_layout_cur_comp(existing_layout);
2340 if (id != LCME_ID_INVAL && id != comp->llc_id)
2342 else if ((flags & LCME_FL_NEG) && (flags & comp->llc_flags))
2344 else if (flags && !(flags & comp->llc_flags))
2347 rc = llapi_layout_comp_del(existing_layout);
2348 /* the layout position is moved to previous one, adjust */
2356 rc = llapi_layout_sanity(existing_layout, path, false, false);
2359 llapi_layout_sanity_perror(rc);
2364 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".del", lum, lum_size, 0);
2375 llapi_layout_free(layout);
2376 llapi_layout_free(existing_layout);
2382 /* Internal utility function to apply flags for sanity checking */
2383 static void llapi_layout_comp_apply_flags(struct llapi_layout_comp *comp,
2386 if (flags & LCME_FL_NEG)
2387 comp->llc_flags &= ~flags;
2389 comp->llc_flags |= flags;
2392 struct llapi_layout_apply_flags_args {
2394 uint32_t *lfa_flags;
2400 static int llapi_layout_apply_flags_cb(struct llapi_layout *layout,
2403 struct llapi_layout_apply_flags_args *args = arg;
2404 struct llapi_layout_comp *comp;
2407 comp = __llapi_layout_cur_comp(layout);
2410 return LLAPI_LAYOUT_ITER_STOP;
2413 for (i = 0; i < args->lfa_count; i++) {
2414 if (comp->llc_id == args->lfa_ids[i])
2415 llapi_layout_comp_apply_flags(comp, args->lfa_flags[i]);
2418 return LLAPI_LAYOUT_ITER_CONT;
2421 /* Apply flags to the layout for sanity checking */
2422 static int llapi_layout_apply_flags(struct llapi_layout *layout, uint32_t *ids,
2423 uint32_t *flags, int count)
2425 struct llapi_layout_apply_flags_args args;
2428 if (!ids || !flags || count == 0) {
2434 args.lfa_flags = flags;
2435 args.lfa_count = count;
2438 rc = llapi_layout_comp_iterate(layout,
2439 llapi_layout_apply_flags_cb,
2441 if (errno == ENOENT)
2444 if (rc != LLAPI_LAYOUT_ITER_CONT)
2450 * Change flags by component ID of components of an existing file.
2451 * The component to be modified is specified by the comp->lcme_id value,
2452 * which must be a unique component ID.
2454 * \param[in] path path name of the file
2455 * \param[in] ids An array of component IDs
2456 * \param[in] flags flags: LCME_FL_* or;
2457 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2458 * \param[in] count Number of elements in ids and flags array
2460 int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags,
2463 int rc = -1, fd = -1, i, tmp_errno = 0;
2465 struct llapi_layout *existing_layout = NULL;
2466 struct llapi_layout *layout = NULL;
2467 struct llapi_layout_comp *comp;
2468 struct lov_user_md *lum = NULL;
2478 for (i = 0; i < count; i++) {
2479 if (!ids[i] || !flags[i]) {
2484 if (ids[i] > LCME_ID_MAX || (flags[i] & ~LCME_KNOWN_FLAGS)) {
2489 /* do not allow to set or clear INIT flag */
2490 if (flags[i] & LCME_FL_INIT) {
2496 fd = open(path, O_RDWR);
2503 existing_layout = llapi_layout_get_by_fd(fd, 0);
2504 if (existing_layout == NULL) {
2510 if (llapi_layout_apply_flags(existing_layout, ids, flags, count)) {
2516 rc = llapi_layout_sanity(existing_layout, path, false, false);
2519 llapi_layout_sanity_perror(rc);
2524 layout = __llapi_layout_alloc();
2525 if (layout == NULL) {
2531 layout->llot_is_composite = true;
2532 for (i = 0; i < count; i++) {
2533 comp = __llapi_comp_alloc(0);
2540 comp->llc_id = ids[i];
2541 comp->llc_flags = flags[i];
2543 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
2544 layout->llot_cur_comp = comp;
2547 lum = llapi_layout_to_lum(layout);
2554 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2556 /* flush cached pages from clients */
2557 rc = llapi_file_flush(fd);
2564 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0);
2577 llapi_layout_free(existing_layout);
2578 llapi_layout_free(layout);
2584 * Check if the file layout is composite.
2586 * \param[in] layout the file layout to check
2588 * \retval true composite
2589 * \retval false not composite
2591 bool llapi_layout_is_composite(struct llapi_layout *layout)
2593 return layout->llot_is_composite;
2597 * Iterate every components in the @layout and call callback function @cb.
2599 * \param[in] layout component layout list.
2600 * \param[in] cb callback for each component
2601 * \param[in] cbdata callback data
2603 * \retval < 0 error happens during the iteration
2604 * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error
2605 * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration
2607 int llapi_layout_comp_iterate(struct llapi_layout *layout,
2608 llapi_layout_iter_cb cb, void *cbdata)
2612 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2617 * make sure on success llapi_layout_comp_use() API returns 0 with
2623 rc = cb(layout, cbdata);
2624 if (rc != LLAPI_LAYOUT_ITER_CONT)
2627 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2630 else if (rc == 1) /* reached the last comp */
2631 return LLAPI_LAYOUT_ITER_CONT;
2638 * llapi_layout_merge() - Merge a composite layout into another one.
2639 * @dst_layout: Destination composite layout.
2640 * @src_layout: Source composite layout.
2642 * This function copies all of the components from @src_layout and
2643 * appends them to @dst_layout.
2645 * Return: 0 on success or -1 on failure.
2647 int llapi_layout_merge(struct llapi_layout **dst_layout,
2648 const struct llapi_layout *src_layout)
2650 struct llapi_layout *new_layout = *dst_layout;
2651 struct llapi_layout_comp *new = NULL;
2652 struct llapi_layout_comp *comp = NULL;
2655 if (src_layout == NULL ||
2656 list_empty((struct list_head *)&src_layout->llot_comp_list))
2659 if (new_layout == NULL) {
2660 new_layout = __llapi_layout_alloc();
2661 if (new_layout == NULL) {
2667 list_for_each_entry(comp, &src_layout->llot_comp_list, llc_list) {
2668 new = __llapi_comp_alloc(0);
2674 new->llc_pattern = comp->llc_pattern;
2675 new->llc_stripe_size = comp->llc_stripe_size;
2676 new->llc_stripe_count = comp->llc_stripe_count;
2677 new->llc_stripe_offset = comp->llc_stripe_offset;
2679 if (comp->llc_pool_name[0] != '\0')
2680 strncpy(new->llc_pool_name, comp->llc_pool_name,
2681 sizeof(new->llc_pool_name));
2683 for (i = 0; i < comp->llc_objects_count; i++) {
2684 if (__llapi_comp_objects_realloc(new,
2685 stripe_number_roundup(i)) < 0) {
2687 __llapi_comp_free(new);
2690 new->llc_objects[i].l_ost_idx = \
2691 comp->llc_objects[i].l_ost_idx;
2694 new->llc_objects_count = comp->llc_objects_count;
2695 new->llc_extent.e_start = comp->llc_extent.e_start;
2696 new->llc_extent.e_end = comp->llc_extent.e_end;
2697 new->llc_id = comp->llc_id;
2698 new->llc_flags = comp->llc_flags;
2700 list_add_tail(&new->llc_list, &new_layout->llot_comp_list);
2701 new_layout->llot_cur_comp = new;
2703 new_layout->llot_is_composite = true;
2705 *dst_layout = new_layout;
2708 llapi_layout_free(new_layout);
2713 * Get the last initialized component
2715 * \param[in] layout component layout list.
2718 * \retval -EINVAL not found
2719 * \retval -EISDIR directory layout
2721 int llapi_layout_get_last_init_comp(struct llapi_layout *layout)
2723 struct llapi_layout_comp *comp = NULL, *head = NULL;
2725 if (!layout->llot_is_composite)
2728 head = list_entry(layout->llot_comp_list.next, typeof(*comp), llc_list);
2731 if (head->llc_id == 0 && !(head->llc_flags & LCME_FL_INIT))
2735 /* traverse the components from the tail to find the last init one */
2736 comp = list_entry(layout->llot_comp_list.prev, typeof(*comp), llc_list);
2737 while (comp != head) {
2738 if (comp->llc_flags & LCME_FL_INIT)
2740 comp = list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
2743 layout->llot_cur_comp = comp;
2745 return comp->llc_flags & LCME_FL_INIT ? 0 : -EINVAL;
2749 * Interit stripe info from the file's component to the mirror
2751 * \param[in] layout file component layout list.
2752 * \param[in] layout mirro component layout list.
2754 * \retval 0 on success
2755 * \retval -EINVAL on error
2757 int llapi_layout_mirror_inherit(struct llapi_layout *f_layout,
2758 struct llapi_layout *m_layout)
2760 struct llapi_layout_comp *m_comp = NULL;
2761 struct llapi_layout_comp *f_comp = NULL;
2764 f_comp = __llapi_layout_cur_comp(f_layout);
2767 m_comp = __llapi_layout_cur_comp(m_layout);
2771 /* DoM component does not inherit stripe size */
2772 if (m_comp->llc_pattern != LLAPI_LAYOUT_MDT)
2773 m_comp->llc_stripe_size = f_comp->llc_stripe_size;
2774 m_comp->llc_stripe_count = f_comp->llc_stripe_count;
2780 * Find all stale components.
2782 * \param[in] layout component layout list.
2783 * \param[out] comp array of stale component info.
2784 * \param[in] comp_size array size of @comp.
2785 * \param[in] mirror_ids array of mirror id that only components
2786 * belonging to these mirror will be collected.
2787 * \param[in] ids_nr number of mirror ids array.
2789 * \retval number of component info collected on sucess or
2790 * an error code on failure.
2792 int llapi_mirror_find_stale(struct llapi_layout *layout,
2793 struct llapi_resync_comp *comp, size_t comp_size,
2794 __u16 *mirror_ids, int ids_nr)
2799 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2807 uint64_t start, end;
2809 rc = llapi_layout_comp_flags_get(layout, &flags);
2813 if (!(flags & LCME_FL_STALE))
2816 rc = llapi_layout_mirror_id_get(layout, &mirror_id);
2820 /* the caller only wants stale components from specific
2825 for (j = 0; j < ids_nr; j++) {
2826 if (mirror_ids[j] == mirror_id)
2830 /* not in the specified mirror */
2833 } else if (flags & LCME_FL_NOSYNC) {
2834 /* if not specified mirrors, do not resync "nosync"
2839 rc = llapi_layout_comp_id_get(layout, &id);
2843 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2847 /* pack this component into @comp array */
2848 comp[idx].lrc_id = id;
2849 comp[idx].lrc_mirror_id = mirror_id;
2850 comp[idx].lrc_start = start;
2851 comp[idx].lrc_end = end;
2854 if (idx >= comp_size) {
2860 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2867 return rc < 0 ? rc : idx;
2870 /* locate @layout to a valid component covering file [file_start, file_end) */
2871 int llapi_mirror_find(struct llapi_layout *layout, uint64_t file_start,
2872 uint64_t file_end, uint64_t *endp)
2874 uint32_t mirror_id = 0;
2877 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2883 uint64_t start, end;
2884 uint32_t flags, id, rid;
2886 rc = llapi_layout_comp_flags_get(layout, &flags);
2890 if (flags & LCME_FL_STALE)
2893 rc = llapi_layout_mirror_id_get(layout, &rid);
2897 rc = llapi_layout_comp_id_get(layout, &id);
2901 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2905 if (file_start >= start && file_start < end) {
2908 else if (mirror_id != rid || *endp != start)
2911 file_start = *endp = end;
2912 if (end >= file_end)
2917 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2927 int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
2928 struct llapi_resync_comp *comp_array,
2929 int comp_size, uint64_t start, uint64_t end)
2931 size_t page_size = sysconf(_SC_PAGESIZE);
2932 const size_t buflen = 4 << 20; /* 4M */
2934 uint64_t pos = start;
2935 uint64_t data_off = pos, data_end = pos;
2941 rc = posix_memalign(&buf, page_size, buflen);
2946 uint64_t mirror_end;
2951 if (pos >= data_end) {
2955 if (pos >= mirror_end || !src) {
2956 rc = llapi_mirror_find(layout, pos, end,
2961 /* restrict mirror end by resync end */
2962 mirror_end = MIN(end, mirror_end);
2965 tmp_off = llapi_mirror_data_seek(fd, src, pos,
2968 /* switch to full copy */
2969 to_read = mirror_end - pos;
2973 data_end = data_off + data_size;
2975 data_off = MIN(data_off, mirror_end);
2976 data_end = MIN(data_end, mirror_end);
2978 /* align by page, if there is data block to copy */
2980 data_off &= ~(page_size - 1);
2983 if (pos < data_off) {
2984 for (i = 0; i < comp_size; i++) {
2987 uint32_t mid = comp_array[i].lrc_mirror_id;
2989 /* skip non-overlapped component */
2990 if (pos >= comp_array[i].lrc_end ||
2991 data_off <= comp_array[i].lrc_start)
2994 if (pos < comp_array[i].lrc_start)
2995 cur_pos = comp_array[i].lrc_start;
2999 if (data_off > comp_array[i].lrc_end)
3000 to_punch = comp_array[i].lrc_end -
3003 to_punch = data_off - cur_pos;
3005 if (comp_array[i].lrc_end == OBD_OBJECT_EOF) {
3006 /* the last component can be truncated
3009 rc = llapi_mirror_truncate(fd, mid,
3011 /* hole at the end of file, so just
3012 * truncate up to set size.
3014 if (!rc && data_off == data_end)
3015 rc = llapi_mirror_truncate(fd,
3018 rc = llapi_mirror_punch(fd,
3019 comp_array[i].lrc_mirror_id,
3022 /* if failed then read failed hole range */
3026 if (pos + to_punch == data_off)
3027 to_read = data_end - pos;
3035 if (pos == mirror_end)
3037 to_read = data_end - pos;
3042 assert(data_end <= mirror_end);
3044 to_read = MIN(buflen, to_read);
3045 to_read = ((to_read - 1) | (page_size - 1)) + 1;
3046 bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
3047 if (bytes_read == 0) {
3051 if (bytes_read < 0) {
3056 /* round up to page align to make direct IO happy. */
3057 to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
3059 for (i = 0; i < comp_size; i++) {
3062 size_t to_write2 = to_write;
3064 /* skip non-overlapped component */
3065 if (pos >= comp_array[i].lrc_end ||
3066 pos + to_write <= comp_array[i].lrc_start)
3069 if (pos < comp_array[i].lrc_start)
3070 pos2 = comp_array[i].lrc_start;
3072 to_write2 -= pos2 - pos;
3074 if ((pos + to_write) > comp_array[i].lrc_end)
3075 to_write2 -= pos + to_write -
3076 comp_array[i].lrc_end;
3078 written = llapi_mirror_write(fd,
3079 comp_array[i].lrc_mirror_id,
3084 * this component is not written successfully,
3085 * mark it using its lrc_synced, it is supposed
3086 * to be false before getting here.
3088 * And before this function returns, all
3089 * elements of comp_array will reverse their
3090 * lrc_synced flag to reflect their true
3093 comp_array[i].lrc_synced = true;
3094 llapi_error(LLAPI_MSG_ERROR, written,
3095 "component %u not synced",
3096 comp_array[i].lrc_id);
3101 assert(written == to_write2);
3109 /* fatal error happens */
3110 for (i = 0; i < comp_size; i++)
3111 comp_array[i].lrc_synced = false;
3116 * no fatal error happens, each lrc_synced tells whether the component
3117 * has been resync successfully (note: we'd reverse the value to
3118 * reflect its true meaning.
3120 for (i = 0; i < comp_size; i++) {
3121 comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
3122 if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
3123 rc = llapi_mirror_truncate(fd,
3124 comp_array[i].lrc_mirror_id, pos);
3125 /* Ignore truncate error on encrypted file without the
3126 * key if tried on LUSTRE_ENCRYPTION_UNIT_SIZE boundary.
3128 if (rc < 0 && (rc != -ENOKEY ||
3129 pos & ~LUSTRE_ENCRYPTION_MASK))
3130 comp_array[i].lrc_synced = false;
3135 * returns the first error code for partially successful resync if
3141 enum llapi_layout_comp_sanity_error {
3143 LSE_INCOMPLETE_MIRROR,
3144 LSE_ADJACENT_EXTENSION,
3148 LSE_DOM_EXTENSION_FOLLOWING,
3151 LSE_NOT_ZERO_LENGTH_EXTENDABLE,
3152 LSE_END_NOT_GREATER,
3153 LSE_ZERO_LENGTH_NORMAL,
3154 LSE_NOT_ADJACENT_PREV,
3162 const char *const llapi_layout_strerror[] =
3165 [LSE_INCOMPLETE_MIRROR] =
3166 "Incomplete mirror - must go to EOF",
3167 [LSE_ADJACENT_EXTENSION] =
3168 "No adjacent extension space components",
3169 [LSE_INIT_EXTENSION] =
3170 "Cannot apply extension flag to init components",
3173 [LSE_DOM_EXTENSION] =
3174 "DoM components can't be extension space",
3175 [LSE_DOM_EXTENSION_FOLLOWING] =
3176 "DoM components cannot be followed by extension space",
3178 "DoM component should be the first one in a file/mirror",
3179 [LSE_SET_COMP_START] =
3180 "Must set previous component extent before adding next",
3181 [LSE_NOT_ZERO_LENGTH_EXTENDABLE] =
3182 "Extendable component must start out zero-length",
3183 [LSE_END_NOT_GREATER] =
3184 "Component end is before end of previous component",
3185 [LSE_ZERO_LENGTH_NORMAL] =
3186 "Zero length components must be followed by extension",
3187 [LSE_NOT_ADJACENT_PREV] =
3188 "Components not adjacent (end != next->start",
3189 [LSE_START_GT_END] =
3190 "Component start is > end",
3192 "The component end must be aligned by the stripe size",
3194 "The extension size must be aligned by the stripe size",
3196 "An unknown OST idx is specified",
3199 struct llapi_layout_sanity_args {
3200 char lsa_fsname[MAX_OBD_NAME + 1];
3201 bool lsa_incomplete;
3207 /* The component flags can be set by users at creation/modification time. */
3208 #define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
3212 * When modified, adjust llapi_stripe_param_verify() if needed as well.
3214 static int llapi_layout_sanity_cb(struct llapi_layout *layout,
3217 struct llapi_layout_comp *comp, *next, *prev;
3218 struct llapi_layout_sanity_args *args = arg;
3219 bool first_comp = false;
3221 comp = __llapi_layout_cur_comp(layout);
3227 if (comp->llc_list.prev != &layout->llot_comp_list)
3228 prev = list_entry(comp->llc_list.prev, typeof(*prev),
3233 if (comp->llc_list.next != &layout->llot_comp_list)
3234 next = list_entry(comp->llc_list.next, typeof(*next),
3239 /* Start of zero implies a new mirror */
3240 if (comp->llc_extent.e_start == 0) {
3242 /* Most checks apply only within one mirror, this is an
3244 if (prev && prev->llc_extent.e_end != LUSTRE_EOF) {
3245 args->lsa_rc = LSE_INCOMPLETE_MIRROR;
3252 if (next && next->llc_extent.e_start == 0)
3255 /* Flag sanity checks */
3256 /* No adjacent extension components */
3257 if ((comp->llc_flags & LCME_FL_EXTENSION) && next &&
3258 (next->llc_flags & LCME_FL_EXTENSION)) {
3259 args->lsa_rc = LSE_ADJACENT_EXTENSION;
3263 /* Extension flag cannot be applied to init components and the first
3264 * component of each mirror is automatically init */
3265 if ((comp->llc_flags & LCME_FL_EXTENSION) &&
3266 (comp->llc_flags & LCME_FL_INIT || first_comp)) {
3267 args->lsa_rc = LSE_INIT_EXTENSION;
3271 if (comp->llc_ondisk) {
3272 if (comp->llc_flags & LCME_FL_NEG)
3273 args->lsa_rc = LSE_FLAGS;
3274 } else if (!args->lsa_incomplete) {
3275 if (args->lsa_flr) {
3276 if (comp->llc_flags & ~LCME_USER_COMP_FLAGS)
3277 args->lsa_rc = LSE_FLAGS;
3279 if (comp->llc_flags &
3280 ~(LCME_FL_EXTENSION | LCME_FL_PREF_RW))
3281 args->lsa_rc = LSE_FLAGS;
3287 /* DoM sanity checks */
3288 if (comp->llc_pattern == LLAPI_LAYOUT_MDT ||
3289 comp->llc_pattern == LOV_PATTERN_MDT) {
3290 /* DoM components can't be extension components */
3291 if (comp->llc_flags & LCME_FL_EXTENSION) {
3292 args->lsa_rc = LSE_DOM_EXTENSION;
3295 /* DoM components cannot be followed by an extension comp */
3296 if (next && (next->llc_flags & LCME_FL_EXTENSION)) {
3297 args->lsa_rc = LSE_DOM_EXTENSION_FOLLOWING;
3301 /* DoM should be the first component in a mirror */
3303 args->lsa_rc = LSE_DOM_FIRST;
3309 /* Extent sanity checks */
3310 /* Must set previous component extent before adding another */
3311 if (prev && prev->llc_extent.e_start == 0 &&
3312 prev->llc_extent.e_end == 0) {
3313 args->lsa_rc = LSE_SET_COMP_START;
3317 if (!args->lsa_incomplete) {
3318 /* Components followed by extension space (extendable
3319 * components) must be zero length before initialization.
3320 * (Except for first comp, which will be initialized on
3322 if (next && (next->llc_flags & LCME_FL_EXTENSION) &&
3323 !first_comp && !(comp->llc_flags & LCME_FL_INIT) &&
3324 comp->llc_extent.e_start != comp->llc_extent.e_end) {
3325 args->lsa_rc = LSE_NOT_ZERO_LENGTH_EXTENDABLE;
3329 /* End must come after end of previous comp */
3330 if (prev && comp->llc_extent.e_end < prev->llc_extent.e_end) {
3331 args->lsa_rc = LSE_END_NOT_GREATER;
3335 /* Components not followed by ext space must have length > 0. */
3336 if (comp->llc_extent.e_start == comp->llc_extent.e_end &&
3337 (next == NULL || !(next->llc_flags & LCME_FL_EXTENSION))) {
3338 args->lsa_rc = LSE_ZERO_LENGTH_NORMAL;
3342 /* The component end must be aligned by the stripe size */
3343 if ((comp->llc_flags & LCME_FL_EXTENSION) &&
3344 (prev->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
3345 if (comp->llc_extent.e_end != LUSTRE_EOF &&
3346 comp->llc_extent.e_end % prev->llc_stripe_size) {
3347 args->lsa_rc = LSE_ALIGN_END;
3350 if ((comp->llc_stripe_size * SEL_UNIT_SIZE) %
3351 prev->llc_stripe_size) {
3352 args->lsa_rc = LSE_ALIGN_EXT;
3355 } else if (!(comp->llc_flags & LCME_FL_EXTENSION) &&
3356 (comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
3357 if (comp->llc_extent.e_end != LUSTRE_EOF &&
3358 comp->llc_extent.e_end !=
3359 comp->llc_extent.e_start &&
3360 comp->llc_extent.e_end % comp->llc_stripe_size) {
3361 args->lsa_rc = LSE_ALIGN_END;
3367 /* Components must have start == prev->end */
3368 if (prev && comp->llc_extent.e_start != 0 &&
3369 comp->llc_extent.e_start != prev->llc_extent.e_end) {
3370 args->lsa_rc = LSE_NOT_ADJACENT_PREV;
3374 /* Components must have start <= end */
3375 if (comp->llc_extent.e_start > comp->llc_extent.e_end) {
3376 args->lsa_rc = LSE_START_GT_END;
3380 if (args->lsa_fsname[0] != '\0') {
3383 if (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC) {
3384 assert(comp->llc_stripe_count <=
3385 comp->llc_objects_count);
3387 for (i = 0; i < comp->llc_stripe_count && rc == 0; i++){
3388 if (comp->llc_objects[i].l_ost_idx ==
3389 LLAPI_LAYOUT_IDX_MAX) {
3393 rc = llapi_layout_search_ost(
3394 comp->llc_objects[i].l_ost_idx,
3395 comp->llc_pool_name, args->lsa_fsname);
3397 } else if (comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT) {
3398 rc = llapi_layout_search_ost(
3399 comp->llc_stripe_offset,
3400 comp->llc_pool_name, args->lsa_fsname);
3403 args->lsa_rc = LSE_UNKNOWN_OST;
3408 return LLAPI_LAYOUT_ITER_CONT;
3411 errno = errno ? errno : EINVAL;
3412 return LLAPI_LAYOUT_ITER_STOP;
3415 /* Print explanation of layout error */
3416 void llapi_layout_sanity_perror(int error)
3418 if (error >= LSE_LAST || error < 0) {
3419 fprintf(stdout, "Invalid layout, unrecognized error: %d\n",
3422 fprintf(stdout, "Invalid layout: %s\n",
3423 llapi_layout_strerror[error]);
3427 /* Walk a layout and enforce sanity checks that apply to > 1 component
3429 * The core idea here is that of sanity checking individual tokens vs semantic
3431 * We cannot check everything at the individual component level ('token'),
3432 * instead we must check whether or not the full layout has a valid meaning.
3434 * An example of a component level check is "is stripe size valid?". That is
3435 * handled when setting stripe size.
3437 * An example of a layout level check is "are the extents of these components
3438 * valid when adjacent to one another", or "can we set these flags on adjacent
3441 * \param[in] layout component layout list.
3442 * \param[in] fname file the layout to be checked for
3443 * \param[in] incomplete if layout is complete or not - some checks can
3444 * only be done on complete layouts.
3445 * \param[in] flr set when this is called from FLR mirror create
3447 * \retval 0, success, positive: various errors, see
3448 * llapi_layout_sanity_perror, -1, failure
3450 int llapi_layout_sanity(struct llapi_layout *layout,
3455 struct llapi_layout_sanity_args args = { { 0 } };
3456 struct llapi_layout_comp *curr;
3462 curr = layout->llot_cur_comp;
3466 /* Make sure we are on a Lustre file system */
3468 rc = llapi_search_fsname(fname, args.lsa_fsname);
3470 llapi_error(LLAPI_MSG_ERROR, rc,
3471 "'%s' is not on a Lustre filesystem",
3480 args.lsa_incomplete = incomplete;
3482 /* When we modify an existing layout, this tells us if it's FLR */
3483 if (mirror_id_of(curr->llc_id) > 0)
3484 args.lsa_flr = true;
3487 rc = llapi_layout_comp_iterate(layout,
3488 llapi_layout_sanity_cb,
3490 if (errno == ENOENT)
3493 if (rc != LLAPI_LAYOUT_ITER_CONT)
3496 layout->llot_cur_comp = curr;
3501 int llapi_layout_dom_size(struct llapi_layout *layout, uint64_t *size)
3503 uint64_t pattern, start;
3506 if (!layout || !llapi_layout_is_composite(layout)) {
3511 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
3515 rc = llapi_layout_pattern_get(layout, &pattern);
3519 if (pattern != LOV_PATTERN_MDT && pattern != LLAPI_LAYOUT_MDT) {
3524 rc = llapi_layout_comp_extent_get(layout, &start, size);
3532 int lov_comp_md_size(struct lov_comp_md_v1 *lcm)
3534 if (lcm->lcm_magic == LOV_MAGIC_V1 || lcm->lcm_magic == LOV_MAGIC_V3) {
3535 struct lov_user_md *lum = (void *)lcm;
3537 return lov_user_md_size(lum->lmm_stripe_count, lum->lmm_magic);
3540 if (lcm->lcm_magic == LOV_MAGIC_FOREIGN) {
3541 struct lov_foreign_md *lfm = (void *)lcm;
3543 return lfm->lfm_length;
3546 if (lcm->lcm_magic != LOV_MAGIC_COMP_V1)
3549 return lcm->lcm_size;
3552 int llapi_get_lum_file_fd(int dir_fd, const char *fname, __u64 *valid,
3553 lstatx_t *statx, struct lov_user_md *lum,
3556 struct lov_user_mds_data *lmd;
3557 char buf[65536 + offsetof(typeof(*lmd), lmd_lmm)];
3561 if (lum && lumsize < sizeof(*lum))
3564 /* If a file name is provided, it is relative to the parent directory */
3570 lmd = (struct lov_user_mds_data *)buf;
3571 rc = get_lmd_info_fd(fname, parent_fd, dir_fd, buf, sizeof(buf),
3577 *valid = lmd->lmd_flags;
3580 memcpy(statx, &lmd->lmd_stx, sizeof(*statx));
3583 if (lmd->lmd_lmmsize > lumsize)
3585 memcpy(lum, &lmd->lmd_lmm, lmd->lmd_lmmsize);
3591 int llapi_get_lum_dir_fd(int dir_fd, __u64 *valid, lstatx_t *statx,
3592 struct lov_user_md *lum, size_t lumsize)
3594 return llapi_get_lum_file_fd(dir_fd, NULL, valid, statx, lum, lumsize);
3597 int llapi_get_lum_file(const char *path, __u64 *valid, lstatx_t *statx,
3598 struct lov_user_md *lum, size_t lumsize)
3600 char parent[PATH_MAX];
3607 tmp = strrchr(path, '/');
3609 strncpy(parent, ".", sizeof(parent) - 1);
3612 strncpy(parent, path, tmp - path);
3613 offset = tmp - path - 1;
3614 parent[tmp - path] = 0;
3619 fname += offset + 2;
3621 dir_fd = open(parent, O_RDONLY);
3624 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
3628 rc = llapi_get_lum_file_fd(dir_fd, fname, valid, statx, lum, lumsize);
3633 int llapi_get_lum_dir(const char *path, __u64 *valid, lstatx_t *statx,
3634 struct lov_user_md *lum, size_t lumsize)
3639 dir_fd = open(path, O_RDONLY);
3642 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
3646 rc = llapi_get_lum_dir_fd(dir_fd, valid, statx, lum, lumsize);