4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the GNU Lesser General Public License
8 * (LGPL) version 2.1 or (at your discretion) any later version.
9 * (LGPL) version 2.1 accompanies this distribution, and is available at
10 * http://www.gnu.org/licenses/lgpl-2.1.html
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
20 * lustre/utils/liblustreapi_layout.c
22 * lustreapi library for layout calls for interacting with the layout of
23 * Lustre files while hiding details of the internal data structures
26 * Copyright (c) 2016, 2017, Intel Corporation.
28 * Author: Ned Bass <bass6@llnl.gov>
38 #include <sys/xattr.h>
39 #include <sys/param.h>
41 #include <libcfs/util/list.h>
42 #include <lustre/lustreapi.h>
43 #include "lustreapi_internal.h"
46 * Layout component, which contains all attributes of a plain
49 struct llapi_layout_comp {
51 uint64_t llc_stripe_size;
52 uint64_t llc_stripe_count;
53 uint64_t llc_stripe_offset;
54 /* Add 1 so user always gets back a null terminated string. */
55 char llc_pool_name[LOV_MAXPOOLNAME + 1];
56 /** Number of objects in llc_objects array if was initialized. */
57 uint32_t llc_objects_count;
58 struct lov_user_ost_data_v1 *llc_objects;
59 /* fields used only for composite layouts */
60 struct lu_extent llc_extent; /* [start, end) of component */
61 uint32_t llc_id; /* unique ID of component */
62 uint32_t llc_flags; /* LCME_FL_* flags */
63 uint64_t llc_timestamp; /* snapshot timestamp */
64 struct list_head llc_list; /* linked to the llapi_layout
70 * An Opaque data type abstracting the layout of a Lustre file.
73 uint32_t llot_magic; /* LLAPI_LAYOUT_MAGIC */
76 bool llot_is_composite;
77 uint16_t llot_mirror_count;
78 /* Cursor pointing to one of the components in llot_comp_list */
79 struct llapi_layout_comp *llot_cur_comp;
80 struct list_head llot_comp_list;
84 * Compute the number of elements in the lmm_objects array of \a lum
85 * with size \a lum_size.
87 * \param[in] lum the struct lov_user_md to check
88 * \param[in] lum_size the number of bytes in \a lum
90 * \retval number of elements in array lum->lmm_objects
92 static int llapi_layout_objects_in_lum(struct lov_user_md *lum, size_t lum_size)
97 if (lum_size < lov_user_md_size(0, LOV_MAGIC_V1))
100 if (lum->lmm_magic == __swab32(LOV_MAGIC_V1) ||
101 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
102 magic = __swab32(lum->lmm_magic);
104 magic = lum->lmm_magic;
106 base_size = lov_user_md_size(0, magic);
108 if (lum_size <= base_size)
111 return (lum_size - base_size) / sizeof(lum->lmm_objects[0]);
115 * Byte-swap the fields of struct lov_user_md.
117 * XXX Rather than duplicating swabbing code here, we should eventually
118 * refactor the needed functions in lustre/ptlrpc/pack_generic.c
119 * into a library that can be shared between kernel and user code.
122 llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
124 int i, j, ent_count, obj_count;
125 struct lov_comp_md_v1 *comp_v1 = NULL;
126 struct lov_comp_md_entry_v1 *ent;
127 struct lov_user_ost_data *lod;
129 if (lum->lmm_magic != __swab32(LOV_MAGIC_V1) &&
130 lum->lmm_magic != __swab32(LOV_MAGIC_V3) &&
131 lum->lmm_magic != __swab32(LOV_MAGIC_COMP_V1))
134 if (lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
135 comp_v1 = (struct lov_comp_md_v1 *)lum;
137 if (comp_v1 != NULL) {
138 comp_v1->lcm_magic = __swab32(comp_v1->lcm_magic);
139 comp_v1->lcm_size = __swab32(comp_v1->lcm_size);
140 comp_v1->lcm_layout_gen = __swab32(comp_v1->lcm_layout_gen);
141 comp_v1->lcm_flags = __swab16(comp_v1->lcm_flags);
142 comp_v1->lcm_entry_count = __swab16(comp_v1->lcm_entry_count);
143 ent_count = comp_v1->lcm_entry_count;
148 for (i = 0; i < ent_count; i++) {
149 if (comp_v1 != NULL) {
150 ent = &comp_v1->lcm_entries[i];
151 ent->lcme_id = __swab32(ent->lcme_id);
152 ent->lcme_flags = __swab32(ent->lcme_flags);
153 ent->lcme_timestamp = __swab64(ent->lcme_timestamp);
154 ent->lcme_extent.e_start = __swab64(ent->lcme_extent.e_start);
155 ent->lcme_extent.e_end = __swab64(ent->lcme_extent.e_end);
156 ent->lcme_offset = __swab32(ent->lcme_offset);
157 ent->lcme_size = __swab32(ent->lcme_size);
159 lum = (struct lov_user_md *)((char *)comp_v1 +
161 lum_size = ent->lcme_size;
163 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
165 lum->lmm_magic = __swab32(lum->lmm_magic);
166 lum->lmm_pattern = __swab32(lum->lmm_pattern);
167 lum->lmm_stripe_size = __swab32(lum->lmm_stripe_size);
168 lum->lmm_stripe_count = __swab16(lum->lmm_stripe_count);
169 lum->lmm_stripe_offset = __swab16(lum->lmm_stripe_offset);
171 if (lum->lmm_magic != LOV_MAGIC_V1) {
172 struct lov_user_md_v3 *v3;
173 v3 = (struct lov_user_md_v3 *)lum;
174 lod = v3->lmm_objects;
176 lod = lum->lmm_objects;
179 for (j = 0; j < obj_count; j++)
180 lod[j].l_ost_idx = __swab32(lod[j].l_ost_idx);
185 * (Re-)allocate llc_objects[] to \a num_stripes stripes.
187 * Copy over existing llc_objects[], if any, to the new llc_objects[].
189 * \param[in] layout existing layout to be modified
190 * \param[in] num_stripes number of stripes in new layout
192 * \retval 0 if the objects are re-allocated successfully
193 * \retval -1 on error with errno set
195 static int __llapi_comp_objects_realloc(struct llapi_layout_comp *comp,
196 unsigned int new_stripes)
198 struct lov_user_ost_data_v1 *new_objects;
201 if (new_stripes > LOV_MAX_STRIPE_COUNT) {
206 if (new_stripes == comp->llc_objects_count)
209 if (new_stripes != 0 && new_stripes <= comp->llc_objects_count)
212 new_objects = realloc(comp->llc_objects,
213 sizeof(*new_objects) * new_stripes);
214 if (new_objects == NULL && new_stripes != 0) {
219 for (i = comp->llc_objects_count; i < new_stripes; i++)
220 new_objects[i].l_ost_idx = LLAPI_LAYOUT_IDX_MAX;
222 comp->llc_objects = new_objects;
223 comp->llc_objects_count = new_stripes;
229 * Allocate storage for a llapi_layout_comp with \a num_stripes stripes.
231 * \param[in] num_stripes number of stripes in new layout
233 * \retval valid pointer if allocation succeeds
234 * \retval NULL if allocation fails
236 static struct llapi_layout_comp *__llapi_comp_alloc(unsigned int num_stripes)
238 struct llapi_layout_comp *comp;
240 if (num_stripes > LOV_MAX_STRIPE_COUNT) {
245 comp = calloc(1, sizeof(*comp));
251 comp->llc_objects = NULL;
252 comp->llc_objects_count = 0;
254 if (__llapi_comp_objects_realloc(comp, num_stripes) < 0) {
260 comp->llc_pattern = LLAPI_LAYOUT_DEFAULT;
261 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
262 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
263 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
264 comp->llc_pool_name[0] = '\0';
265 comp->llc_extent.e_start = 0;
266 comp->llc_extent.e_end = LUSTRE_EOF;
269 INIT_LIST_HEAD(&comp->llc_list);
275 * Free memory allocated for \a comp
277 * \param[in] comp previously allocated by __llapi_comp_alloc()
279 static void __llapi_comp_free(struct llapi_layout_comp *comp)
281 if (comp->llc_objects != NULL)
282 free(comp->llc_objects);
287 * Free memory allocated for \a layout.
289 * \param[in] layout previously allocated by llapi_layout_alloc()
291 void llapi_layout_free(struct llapi_layout *layout)
293 struct llapi_layout_comp *comp, *n;
298 list_for_each_entry_safe(comp, n, &layout->llot_comp_list, llc_list) {
299 list_del_init(&comp->llc_list);
300 __llapi_comp_free(comp);
306 * Allocate and initialize a llapi_layout structure.
308 * \retval valid llapi_layout pointer on success
309 * \retval NULL if memory allocation fails
311 static struct llapi_layout *__llapi_layout_alloc(void)
313 struct llapi_layout *layout;
315 layout = calloc(1, sizeof(*layout));
316 if (layout == NULL) {
322 layout->llot_magic = LLAPI_LAYOUT_MAGIC;
323 layout->llot_gen = 0;
324 layout->llot_flags = 0;
325 layout->llot_is_composite = false;
326 layout->llot_mirror_count = 1;
327 layout->llot_cur_comp = NULL;
328 INIT_LIST_HEAD(&layout->llot_comp_list);
334 * Allocate and initialize a new plain layout.
336 * \retval valid llapi_layout pointer on success
337 * \retval NULL if memory allocation fails
339 struct llapi_layout *llapi_layout_alloc(void)
341 struct llapi_layout_comp *comp;
342 struct llapi_layout *layout;
344 layout = __llapi_layout_alloc();
348 comp = __llapi_comp_alloc(0);
354 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
355 layout->llot_cur_comp = comp;
361 * Check if the given \a lum_size is large enough to hold the required
364 * \param[in] lum the struct lov_user_md to check
365 * \param[in] lum_size the number of bytes in \a lum
367 * \retval true the \a lum_size is too small
368 * \retval false the \a lum_size is large enough
370 static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
374 if (lum_size < sizeof(lum->lmm_magic))
377 if (lum->lmm_magic == LOV_MAGIC_V1 ||
378 lum->lmm_magic == __swab32(LOV_MAGIC_V1))
379 magic = LOV_MAGIC_V1;
380 else if (lum->lmm_magic == LOV_MAGIC_V3 ||
381 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
382 magic = LOV_MAGIC_V3;
383 else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
384 lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
385 magic = LOV_MAGIC_COMP_V1;
389 if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
390 return lum_size < lov_user_md_size(0, magic);
392 return lum_size < sizeof(struct lov_comp_md_v1);
395 /* Verify if the objects count in lum is consistent with the
396 * stripe count in lum. It applies to regular file only. */
397 static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
399 struct lov_comp_md_v1 *comp_v1 = NULL;
400 int i, ent_count, obj_count;
402 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
403 comp_v1 = (struct lov_comp_md_v1 *)lum;
404 ent_count = comp_v1->lcm_entry_count;
405 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
406 lum->lmm_magic == LOV_MAGIC_V3) {
412 for (i = 0; i < ent_count; i++) {
414 lum = (struct lov_user_md *)((char *)comp_v1 +
415 comp_v1->lcm_entries[i].lcme_offset);
416 lum_size = comp_v1->lcm_entries[i].lcme_size;
418 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
421 if (!(comp_v1->lcm_entries[i].lcme_flags &
422 LCME_FL_INIT) && obj_count != 0)
424 } else if (obj_count != lum->lmm_stripe_count) {
432 * Convert the data from a lov_user_md to a newly allocated llapi_layout.
433 * The caller is responsible for freeing the returned pointer.
435 * \param[in] lov_xattr LOV user metadata xattr to copy data from
436 * \param[in] lov_xattr_size size the lov_xattr_size passed in
437 * \param[in] flags flags to control how layout is retrieved
439 * \retval valid llapi_layout pointer on success
440 * \retval NULL if memory allocation fails
442 struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
443 ssize_t lov_xattr_size,
444 enum llapi_layout_get_flags flags)
446 struct lov_user_md *lum = lov_xattr;
447 struct lov_comp_md_v1 *comp_v1 = NULL;
448 struct lov_comp_md_entry_v1 *ent;
449 struct lov_user_md *v1;
450 struct llapi_layout *layout = NULL;
451 struct llapi_layout_comp *comp;
452 int i, ent_count = 0, obj_count;
454 if (lov_xattr == NULL || lov_xattr_size <= 0) {
459 /* Return an error if we got back a partial layout. */
460 if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) {
465 #if __BYTE_ORDER == __BIG_ENDIAN
466 if (flags & LLAPI_LAYOUT_GET_COPY) {
467 lum = malloc(lov_xattr_size);
472 memcpy(lum, lov_xattr, lov_xattr_size);
476 llapi_layout_swab_lov_user_md(lum, lov_xattr_size);
478 #if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 16, 53, 0)
479 #define LLAPI_LXF_CHECK_OLD 0x0001
480 if (flags & LLAPI_LXF_CHECK_OLD)
481 flags = (flags & ~LLAPI_LXF_CHECK_OLD) | LLAPI_LAYOUT_GET_CHECK;
483 if ((flags & LLAPI_LAYOUT_GET_CHECK) &&
484 !llapi_layout_lum_valid(lum, lov_xattr_size)) {
489 layout = __llapi_layout_alloc();
490 if (layout == NULL) {
495 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
496 comp_v1 = (struct lov_comp_md_v1 *)lum;
497 ent_count = comp_v1->lcm_entry_count;
498 layout->llot_gen = comp_v1->lcm_layout_gen;
499 layout->llot_is_composite = true;
500 layout->llot_mirror_count = comp_v1->lcm_mirror_count + 1;
501 layout->llot_gen = comp_v1->lcm_layout_gen;
502 layout->llot_flags = comp_v1->lcm_flags;
503 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
504 lum->lmm_magic == LOV_MAGIC_V3) {
506 layout->llot_is_composite = false;
508 if (lov_xattr_size <= 0) {
517 if (ent_count == 0) {
522 v1 = (struct lov_user_md *)lum;
523 for (i = 0; i < ent_count; i++) {
524 if (comp_v1 != NULL) {
525 ent = &comp_v1->lcm_entries[i];
526 v1 = (struct lov_user_md *)((char *)comp_v1 +
528 lov_xattr_size = ent->lcme_size;
533 obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
534 comp = __llapi_comp_alloc(obj_count);
539 comp->llc_extent.e_start = ent->lcme_extent.e_start;
540 comp->llc_extent.e_end = ent->lcme_extent.e_end;
541 comp->llc_id = ent->lcme_id;
542 comp->llc_flags = ent->lcme_flags;
543 if (comp->llc_flags & LCME_FL_NOSYNC)
544 comp->llc_timestamp = ent->lcme_timestamp;
546 comp->llc_extent.e_start = 0;
547 comp->llc_extent.e_end = LUSTRE_EOF;
552 if (v1->lmm_pattern == LOV_PATTERN_RAID0)
553 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
554 else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 |
555 LOV_PATTERN_OVERSTRIPING))
556 comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING;
557 else if (v1->lmm_pattern == LOV_PATTERN_MDT)
558 comp->llc_pattern = LLAPI_LAYOUT_MDT;
560 /* Lustre only supports RAID0, overstripping
563 comp->llc_pattern = v1->lmm_pattern;
565 if (v1->lmm_stripe_size == 0)
566 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
568 comp->llc_stripe_size = v1->lmm_stripe_size;
570 if (v1->lmm_stripe_count == (typeof(v1->lmm_stripe_count))-1)
571 comp->llc_stripe_count = LLAPI_LAYOUT_WIDE;
572 else if (v1->lmm_stripe_count == 0)
573 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
575 comp->llc_stripe_count = v1->lmm_stripe_count;
577 if (v1->lmm_stripe_offset ==
578 (typeof(v1->lmm_stripe_offset))-1)
579 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
581 comp->llc_stripe_offset = v1->lmm_stripe_offset;
583 if (v1->lmm_magic != LOV_USER_MAGIC_V1) {
584 const struct lov_user_md_v3 *lumv3;
585 lumv3 = (struct lov_user_md_v3 *)v1;
586 snprintf(comp->llc_pool_name,
587 sizeof(comp->llc_pool_name),
588 "%s", lumv3->lmm_pool_name);
589 memcpy(comp->llc_objects, lumv3->lmm_objects,
590 obj_count * sizeof(lumv3->lmm_objects[0]));
592 const struct lov_user_md_v1 *lumv1;
593 lumv1 = (struct lov_user_md_v1 *)v1;
594 memcpy(comp->llc_objects, lumv1->lmm_objects,
595 obj_count * sizeof(lumv1->lmm_objects[0]));
599 comp->llc_stripe_offset =
600 comp->llc_objects[0].l_ost_idx;
602 comp->llc_ondisk = true;
603 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
604 layout->llot_cur_comp = comp;
608 if (lum != lov_xattr)
612 llapi_layout_free(layout);
617 __u32 llapi_pattern_to_lov(uint64_t pattern)
622 case LLAPI_LAYOUT_DEFAULT:
623 lov_pattern = LOV_PATTERN_RAID0;
625 case LLAPI_LAYOUT_RAID0:
626 lov_pattern = LOV_PATTERN_RAID0;
628 case LLAPI_LAYOUT_MDT:
629 lov_pattern = LOV_PATTERN_MDT;
631 case LLAPI_LAYOUT_OVERSTRIPING:
632 lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0;
635 lov_pattern = EINVAL;
642 * Convert the data from a llapi_layout to a newly allocated lov_user_md.
643 * The caller is responsible for freeing the returned pointer.
645 * \param[in] layout the layout to copy from
647 * \retval valid lov_user_md pointer on success
648 * \retval NULL if memory allocation fails or the layout is invalid
650 static struct lov_user_md *
651 llapi_layout_to_lum(const struct llapi_layout *layout)
653 struct llapi_layout_comp *comp;
654 struct lov_comp_md_v1 *comp_v1 = NULL;
655 struct lov_comp_md_entry_v1 *ent;
656 struct lov_user_md *lum = NULL;
661 if (layout == NULL ||
662 list_empty((struct list_head *)&layout->llot_comp_list)) {
667 /* Allocate header of lov_comp_md_v1 if necessary */
668 if (layout->llot_is_composite) {
671 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
674 lum_size = sizeof(*comp_v1) + comp_cnt * sizeof(*ent);
675 lum = calloc(lum_size, 1);
680 comp_v1 = (struct lov_comp_md_v1 *)lum;
681 comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
682 comp_v1->lcm_size = lum_size;
683 comp_v1->lcm_layout_gen = 0;
684 comp_v1->lcm_flags = layout->llot_flags;
685 comp_v1->lcm_entry_count = comp_cnt;
686 comp_v1->lcm_mirror_count = layout->llot_mirror_count - 1;
690 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
691 struct lov_user_md *blob;
694 int i, obj_count = 0;
695 struct lov_user_ost_data *lmm_objects;
696 uint64_t pattern = comp->llc_pattern;
698 if ((pattern & LLAPI_LAYOUT_SPECIFIC) != 0) {
699 if (comp->llc_objects_count <
700 comp->llc_stripe_count) {
704 magic = LOV_USER_MAGIC_SPECIFIC;
705 obj_count = comp->llc_stripe_count;
706 pattern &= ~LLAPI_LAYOUT_SPECIFIC;
707 } else if (strlen(comp->llc_pool_name) != 0) {
708 magic = LOV_USER_MAGIC_V3;
710 magic = LOV_USER_MAGIC_V1;
712 /* All stripes must be specified when the pattern contains
713 * LLAPI_LAYOUT_SPECIFIC */
714 for (i = 0; i < obj_count; i++) {
715 if (comp->llc_objects[i].l_ost_idx ==
716 LLAPI_LAYOUT_IDX_MAX) {
722 blob_size = lov_user_md_size(obj_count, magic);
723 blob = realloc(lum, lum_size + blob_size);
729 comp_v1 = (struct lov_comp_md_v1 *)lum;
730 blob = (struct lov_user_md *)((char *)lum + lum_size);
731 lum_size += blob_size;
734 blob->lmm_magic = magic;
735 blob->lmm_pattern = llapi_pattern_to_lov(pattern);
736 if (blob->lmm_pattern == EINVAL) {
741 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
742 blob->lmm_stripe_size = 0;
744 blob->lmm_stripe_size = comp->llc_stripe_size;
746 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
747 blob->lmm_stripe_count = 0;
748 else if (comp->llc_stripe_count == LLAPI_LAYOUT_WIDE)
749 blob->lmm_stripe_count = LOV_ALL_STRIPES;
751 blob->lmm_stripe_count = comp->llc_stripe_count;
753 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
754 blob->lmm_stripe_offset = -1;
756 blob->lmm_stripe_offset = comp->llc_stripe_offset;
758 if (magic == LOV_USER_MAGIC_V3 ||
759 magic == LOV_USER_MAGIC_SPECIFIC) {
760 struct lov_user_md_v3 *lumv3 =
761 (struct lov_user_md_v3 *)blob;
763 if (comp->llc_pool_name[0] != '\0') {
764 strncpy(lumv3->lmm_pool_name,
766 sizeof(lumv3->lmm_pool_name));
768 memset(lumv3->lmm_pool_name, 0,
769 sizeof(lumv3->lmm_pool_name));
771 lmm_objects = lumv3->lmm_objects;
773 lmm_objects = blob->lmm_objects;
776 for (i = 0; i < obj_count; i++)
777 lmm_objects[i].l_ost_idx =
778 comp->llc_objects[i].l_ost_idx;
780 if (layout->llot_is_composite) {
781 ent = &comp_v1->lcm_entries[ent_idx];
782 ent->lcme_id = comp->llc_id;
783 ent->lcme_flags = comp->llc_flags;
784 if (ent->lcme_flags & LCME_FL_NOSYNC)
785 ent->lcme_timestamp = comp->llc_timestamp;
786 ent->lcme_extent.e_start = comp->llc_extent.e_start;
787 ent->lcme_extent.e_end = comp->llc_extent.e_end;
788 ent->lcme_size = blob_size;
789 ent->lcme_offset = offset;
791 comp_v1->lcm_size += blob_size;
805 * Get the parent directory of a path.
807 * \param[in] path path to get parent of
808 * \param[out] buf buffer in which to store parent path
809 * \param[in] size size in bytes of buffer \a buf
811 static void get_parent_dir(const char *path, char *buf, size_t size)
815 strncpy(buf, path, size - 1);
816 p = strrchr(buf, '/');
820 } else if (size >= 2) {
821 strncpy(buf, ".", 2);
822 buf[size - 1] = '\0';
827 * Substitute unspecified attribute values in \a layout with values
828 * from fs global settings. (lov.stripesize, lov.stripecount,
831 * \param[in] layout layout to inherit values from
832 * \param[in] path file path of the filesystem
834 static void inherit_sys_attributes(struct llapi_layout *layout,
837 struct llapi_layout_comp *comp;
838 unsigned int ssize, scount, soffset;
841 rc = sattr_cache_get_defaults(NULL, path, &scount, &ssize, &soffset);
845 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
846 if (comp->llc_pattern == LLAPI_LAYOUT_DEFAULT)
847 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
848 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
849 comp->llc_stripe_size = ssize;
850 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
851 comp->llc_stripe_count = scount;
852 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
853 comp->llc_stripe_offset = soffset;
858 * Get the current component of \a layout.
860 * \param[in] layout layout to get current component
862 * \retval valid llapi_layout_comp pointer on success
863 * \retval NULL on error
865 static struct llapi_layout_comp *
866 __llapi_layout_cur_comp(const struct llapi_layout *layout)
868 struct llapi_layout_comp *comp;
870 if (layout == NULL || layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
874 if (layout->llot_cur_comp == NULL) {
878 /* Verify data consistency */
879 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
880 if (comp == layout->llot_cur_comp)
887 * Test if any attributes of \a layout are specified.
889 * \param[in] layout the layout to check
891 * \retval true any attributes are specified
892 * \retval false all attributes are unspecified
894 static bool is_any_specified(const struct llapi_layout *layout)
896 struct llapi_layout_comp *comp;
898 comp = __llapi_layout_cur_comp(layout);
902 if (layout->llot_is_composite || layout->llot_mirror_count != 1)
905 return comp->llc_pattern != LLAPI_LAYOUT_DEFAULT ||
906 comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT ||
907 comp->llc_stripe_count != LLAPI_LAYOUT_DEFAULT ||
908 comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT ||
909 strlen(comp->llc_pool_name);
913 * Get the striping layout for the file referenced by file descriptor \a fd.
915 * If the filesystem does not support the "lustre." xattr namespace, the
916 * file must be on a non-Lustre filesystem, so set errno to ENOTTY per
917 * convention. If the file has no "lustre.lov" data, the file will
918 * inherit default values, so return a default layout.
920 * If the kernel gives us back less than the expected amount of data,
921 * we fail with errno set to EINTR.
923 * \param[in] fd open file descriptor
924 * \param[in] flags open file descriptor
926 * \retval valid llapi_layout pointer on success
927 * \retval NULL if an error occurs
929 struct llapi_layout *llapi_layout_get_by_fd(int fd,
930 enum llapi_layout_get_flags flags)
933 struct lov_user_md *lum;
934 struct llapi_layout *layout = NULL;
938 lum_len = XATTR_SIZE_MAX;
939 lum = malloc(lum_len);
943 bytes_read = fgetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_len);
944 if (bytes_read < 0) {
945 if (errno == EOPNOTSUPP)
947 else if (errno == ENODATA)
948 layout = llapi_layout_alloc();
952 /* Directories may have a positive non-zero lum->lmm_stripe_count
953 * yet have an empty lum->lmm_objects array. For non-directories the
954 * amount of data returned from the kernel must be consistent
955 * with the stripe count. */
956 if (fstat(fd, &st) < 0)
959 layout = llapi_layout_get_by_xattr(lum, bytes_read,
960 S_ISDIR(st.st_mode) ? 0 : LLAPI_LAYOUT_GET_CHECK);
967 * Get the expected striping layout for a file at \a path.
969 * Substitute expected inherited attribute values for unspecified
970 * attributes. Unspecified attributes may belong to directories and
971 * never-written-to files, and indicate that default values will be
972 * assigned when files are created or first written to. A default value
973 * is inherited from the parent directory if the attribute is specified
974 * there, otherwise it is inherited from the filesystem root.
975 * Unspecified attributes normally have the value LLAPI_LAYOUT_DEFAULT.
977 * The complete \a path need not refer to an existing file or directory,
978 * but some leading portion of it must reside within a lustre filesystem.
979 * A use case for this interface would be to obtain the literal striping
980 * values that would be assigned to a new file in a given directory.
982 * \param[in] path path for which to get the expected layout
984 * \retval valid llapi_layout pointer on success
985 * \retval NULL if an error occurs
987 static struct llapi_layout *llapi_layout_expected(const char *path)
989 struct llapi_layout *path_layout = NULL;
990 char donor_path[PATH_MAX];
995 fd = open(path, O_RDONLY);
996 if (fd < 0 && errno != ENOENT)
1002 path_layout = llapi_layout_get_by_fd(fd, 0);
1008 if (path_layout == NULL) {
1009 if (errno != ENODATA && errno != ENOENT)
1012 path_layout = llapi_layout_alloc();
1013 if (path_layout == NULL)
1017 if (is_any_specified(path_layout)) {
1018 inherit_sys_attributes(path_layout, path);
1022 llapi_layout_free(path_layout);
1024 rc = stat(path, &st);
1025 if (rc < 0 && errno != ENOENT)
1028 /* If path is a not a directory or doesn't exist, inherit layout
1029 * from parent directory. */
1030 if ((rc == 0 && !S_ISDIR(st.st_mode)) ||
1031 (rc < 0 && errno == ENOENT)) {
1032 get_parent_dir(path, donor_path, sizeof(donor_path));
1033 path_layout = llapi_layout_get_by_path(donor_path, 0);
1034 if (path_layout != NULL) {
1035 if (is_any_specified(path_layout)) {
1036 inherit_sys_attributes(path_layout, donor_path);
1039 llapi_layout_free(path_layout);
1043 /* Inherit layout from the filesystem root. */
1044 rc = llapi_search_mounts(path, 0, donor_path, NULL);
1047 path_layout = llapi_layout_get_by_path(donor_path, 0);
1048 if (path_layout == NULL)
1051 inherit_sys_attributes(path_layout, donor_path);
1056 * Get the striping layout for the file at \a path.
1058 * If \a flags contains LLAPI_LAYOUT_GET_EXPECTED, substitute
1059 * expected inherited attribute values for unspecified attributes. See
1060 * llapi_layout_expected().
1062 * \param[in] path path for which to get the layout
1063 * \param[in] flags flags to control how layout is retrieved
1065 * \retval valid llapi_layout pointer on success
1066 * \retval NULL if an error occurs
1068 struct llapi_layout *llapi_layout_get_by_path(const char *path,
1069 enum llapi_layout_get_flags flags)
1071 struct llapi_layout *layout = NULL;
1072 bool failed = false;
1077 if (flags & LLAPI_LAYOUT_GET_EXPECTED)
1078 return llapi_layout_expected(path);
1080 /* Always get layout in O_DIRECT */
1081 /* Allow fetching layout even without the key on encrypted files */
1082 open_flags = O_RDONLY | O_DIRECT | O_FILE_ENC;
1084 fd = open(path, open_flags);
1086 if (errno != EINVAL || failed)
1088 /* EINVAL is because a directory cannot be opened in O_DIRECT */
1089 open_flags = O_RDONLY | O_FILE_ENC;
1094 layout = llapi_layout_get_by_fd(fd, flags);
1103 * Get the layout for the file with FID \a fidstr in filesystem \a lustre_dir.
1105 * \param[in] lustre_dir path within Lustre filesystem containing \a fid
1106 * \param[in] fid Lustre identifier of file to get layout for
1108 * \retval valid llapi_layout pointer on success
1109 * \retval NULL if an error occurs
1111 struct llapi_layout *llapi_layout_get_by_fid(const char *lustre_dir,
1112 const struct lu_fid *fid,
1113 enum llapi_layout_get_flags flags)
1117 int saved_msg_level = llapi_msg_get_level();
1118 struct llapi_layout *layout = NULL;
1120 /* Prevent llapi internal routines from writing to console
1121 * while executing this function, then restore previous message
1123 llapi_msg_set_level(LLAPI_MSG_OFF);
1124 fd = llapi_open_by_fid(lustre_dir, fid, O_RDONLY);
1125 llapi_msg_set_level(saved_msg_level);
1130 layout = llapi_layout_get_by_fd(fd, flags);
1139 * Get the stripe count of \a layout.
1141 * \param[in] layout layout to get stripe count from
1142 * \param[out] count integer to store stripe count in
1144 * \retval 0 on success
1145 * \retval -1 if arguments are invalid
1147 int llapi_layout_stripe_count_get(const struct llapi_layout *layout,
1150 struct llapi_layout_comp *comp;
1152 comp = __llapi_layout_cur_comp(layout);
1156 if (count == NULL) {
1161 *count = comp->llc_stripe_count;
1167 * The llapi_layout API functions have these extra validity checks since
1168 * they use intuitively named macros to denote special behavior, whereas
1169 * the old API uses 0 and -1.
1172 bool llapi_layout_stripe_count_is_valid(int64_t stripe_count)
1174 return stripe_count == LLAPI_LAYOUT_DEFAULT ||
1175 stripe_count == LLAPI_LAYOUT_WIDE ||
1176 (stripe_count != 0 && stripe_count != -1 &&
1177 llapi_stripe_count_is_valid(stripe_count));
1180 static bool llapi_layout_extension_size_is_valid(uint64_t ext_size)
1182 return (ext_size != 0 &&
1183 llapi_stripe_size_is_aligned(ext_size) &&
1184 !llapi_stripe_size_is_too_big(ext_size));
1187 static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size)
1189 return stripe_size == LLAPI_LAYOUT_DEFAULT ||
1190 (stripe_size != 0 &&
1191 llapi_stripe_size_is_aligned(stripe_size) &&
1192 !llapi_stripe_size_is_too_big(stripe_size));
1195 static bool llapi_layout_stripe_index_is_valid(int64_t stripe_index)
1197 return stripe_index == LLAPI_LAYOUT_DEFAULT ||
1198 (stripe_index >= 0 &&
1199 llapi_stripe_index_is_valid(stripe_index));
1203 * Set the stripe count of \a layout.
1205 * \param[in] layout layout to set stripe count in
1206 * \param[in] count value to be set
1208 * \retval 0 on success
1209 * \retval -1 if arguments are invalid
1211 int llapi_layout_stripe_count_set(struct llapi_layout *layout,
1214 struct llapi_layout_comp *comp;
1216 comp = __llapi_layout_cur_comp(layout);
1220 if (!llapi_layout_stripe_count_is_valid(count)) {
1225 comp->llc_stripe_count = count;
1231 * Get the stripe/extension size of \a layout.
1233 * \param[in] layout layout to get stripe size from
1234 * \param[out] size integer to store stripe size in
1235 * \param[in] extension flag if extenion size is requested
1237 * \retval 0 on success
1238 * \retval -1 if arguments are invalid
1240 static int layout_stripe_size_get(const struct llapi_layout *layout,
1241 uint64_t *size, bool extension)
1243 struct llapi_layout_comp *comp;
1246 comp = __llapi_layout_cur_comp(layout);
1255 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1256 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1261 *size = comp->llc_stripe_size;
1262 if (comp->llc_flags & LCME_FL_EXTENSION)
1263 *size *= SEL_UNIT_SIZE;
1268 int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
1271 return layout_stripe_size_get(layout, size, false);
1274 int llapi_layout_extension_size_get(const struct llapi_layout *layout,
1277 return layout_stripe_size_get(layout, size, true);
1281 * Set the stripe/extension size of \a layout.
1283 * \param[in] layout layout to set stripe size in
1284 * \param[in] size value to be set
1285 * \param[in] extension flag if extenion size is passed
1287 * \retval 0 on success
1288 * \retval -1 if arguments are invalid
1290 static int layout_stripe_size_set(struct llapi_layout *layout,
1291 uint64_t size, bool extension)
1293 struct llapi_layout_comp *comp;
1296 comp = __llapi_layout_cur_comp(layout);
1300 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1301 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1307 size /= SEL_UNIT_SIZE;
1309 if ((comp_ext && !llapi_layout_extension_size_is_valid(size)) ||
1310 (!comp_ext && !llapi_layout_stripe_size_is_valid(size))) {
1315 comp->llc_stripe_size = size;
1319 int llapi_layout_stripe_size_set(struct llapi_layout *layout,
1322 return layout_stripe_size_set(layout, size, false);
1325 int llapi_layout_extension_size_set(struct llapi_layout *layout,
1328 return layout_stripe_size_set(layout, size, true);
1332 * Get the RAID pattern of \a layout.
1334 * \param[in] layout layout to get pattern from
1335 * \param[out] pattern integer to store pattern in
1337 * \retval 0 on success
1338 * \retval -1 if arguments are invalid
1340 int llapi_layout_pattern_get(const struct llapi_layout *layout,
1343 struct llapi_layout_comp *comp;
1345 comp = __llapi_layout_cur_comp(layout);
1349 if (pattern == NULL) {
1354 *pattern = comp->llc_pattern;
1360 * Set the pattern of \a layout.
1362 * \param[in] layout layout to set pattern in
1363 * \param[in] pattern value to be set
1365 * \retval 0 on success
1366 * \retval -1 if arguments are invalid or RAID pattern
1369 int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
1371 struct llapi_layout_comp *comp;
1373 comp = __llapi_layout_cur_comp(layout);
1377 if (pattern != LLAPI_LAYOUT_DEFAULT &&
1378 pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT
1379 && pattern != LLAPI_LAYOUT_OVERSTRIPING) {
1384 comp->llc_pattern = pattern |
1385 (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC);
1390 static inline int stripe_number_roundup(int stripe_number)
1392 unsigned int round_up = (stripe_number + 8) & ~7;
1393 return round_up > LOV_MAX_STRIPE_COUNT ?
1394 LOV_MAX_STRIPE_COUNT : round_up;
1398 * Set the OST index of stripe number \a stripe_number to \a ost_index.
1400 * If only the starting stripe's OST index is specified, then this can use
1401 * the normal LOV_MAGIC_{V1,V3} layout type. If multiple OST indices are
1402 * given, then allocate an array to hold the list of indices and ensure that
1403 * the LOV_USER_MAGIC_SPECIFIC layout is used when creating the file.
1405 * \param[in] layout layout to set OST index in
1406 * \param[in] stripe_number stripe number to set index for
1407 * \param[in] ost_index the index to set
1409 * \retval 0 on success
1410 * \retval -1 if arguments are invalid or an unsupported stripe number
1411 * was specified, error returned in errno
1413 int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number,
1416 struct llapi_layout_comp *comp;
1418 comp = __llapi_layout_cur_comp(layout);
1422 if (!llapi_layout_stripe_index_is_valid(ost_index)) {
1427 if (stripe_number == 0 && ost_index == LLAPI_LAYOUT_DEFAULT) {
1428 comp->llc_stripe_offset = ost_index;
1429 comp->llc_pattern &= ~LLAPI_LAYOUT_SPECIFIC;
1430 __llapi_comp_objects_realloc(comp, 0);
1431 } else if (stripe_number >= 0 &&
1432 stripe_number < LOV_MAX_STRIPE_COUNT) {
1433 if (ost_index >= LLAPI_LAYOUT_IDX_MAX) {
1438 /* Preallocate a few more stripes to avoid realloc() overhead.*/
1439 if (__llapi_comp_objects_realloc(comp,
1440 stripe_number_roundup(stripe_number)) < 0)
1443 comp->llc_objects[stripe_number].l_ost_idx = ost_index;
1445 if (stripe_number == 0)
1446 comp->llc_stripe_offset = ost_index;
1448 comp->llc_pattern |= LLAPI_LAYOUT_SPECIFIC;
1450 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT ||
1451 comp->llc_stripe_count <= stripe_number)
1452 comp->llc_stripe_count = stripe_number + 1;
1462 * Get the OST index associated with stripe \a stripe_number.
1464 * Stripes are indexed starting from zero.
1466 * \param[in] layout layout to get index from
1467 * \param[in] stripe_number stripe number to get index for
1468 * \param[out] index integer to store index in
1470 * \retval 0 on success
1471 * \retval -1 if arguments are invalid
1473 int llapi_layout_ost_index_get(const struct llapi_layout *layout,
1474 uint64_t stripe_number, uint64_t *index)
1476 struct llapi_layout_comp *comp;
1478 comp = __llapi_layout_cur_comp(layout);
1482 if (index == NULL) {
1487 if (stripe_number >= comp->llc_stripe_count ||
1488 stripe_number >= comp->llc_objects_count) {
1493 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
1494 *index = LLAPI_LAYOUT_DEFAULT;
1496 *index = comp->llc_objects[stripe_number].l_ost_idx;
1503 * Get the pool name of layout \a layout.
1505 * \param[in] layout layout to get pool name from
1506 * \param[out] dest buffer to store pool name in
1507 * \param[in] n size in bytes of buffer \a dest
1509 * \retval 0 on success
1510 * \retval -1 if arguments are invalid
1512 int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest,
1515 struct llapi_layout_comp *comp;
1517 comp = __llapi_layout_cur_comp(layout);
1526 strncpy(dest, comp->llc_pool_name, n);
1532 * Set the name of the pool of layout \a layout.
1534 * \param[in] layout layout to set pool name in
1535 * \param[in] pool_name pool name to set
1537 * \retval 0 on success
1538 * \retval -1 if arguments are invalid or pool name is too long
1540 int llapi_layout_pool_name_set(struct llapi_layout *layout,
1541 const char *pool_name)
1543 struct llapi_layout_comp *comp;
1545 comp = __llapi_layout_cur_comp(layout);
1549 if (!llapi_pool_name_is_valid(&pool_name)) {
1554 strncpy(comp->llc_pool_name, pool_name, sizeof(comp->llc_pool_name));
1559 * Open and possibly create a file with a given \a layout.
1561 * If \a layout is NULL this function acts as a simple wrapper for
1562 * open(). By convention, ENOTTY is returned in errno if \a path
1563 * refers to a non-Lustre file.
1565 * \param[in] path name of the file to open
1566 * \param[in] open_flags open() flags
1567 * \param[in] mode permissions to create file, filtered by umask
1568 * \param[in] layout layout to create new file with
1570 * \retval non-negative file descriptor on successful open
1571 * \retval -1 if an error occurred
1573 int llapi_layout_file_open(const char *path, int open_flags, mode_t mode,
1574 const struct llapi_layout *layout)
1579 struct lov_user_md *lum;
1583 (layout != NULL && layout->llot_magic != LLAPI_LAYOUT_MAGIC)) {
1589 rc = llapi_layout_sanity((struct llapi_layout *)layout, false,
1590 !!(layout->llot_mirror_count > 1));
1592 llapi_layout_sanity_perror(rc);
1597 /* Object creation must be postponed until after layout attributes
1598 * have been applied. */
1599 if (layout != NULL && (open_flags & O_CREAT))
1600 open_flags |= O_LOV_DELAY_CREATE;
1602 fd = open(path, open_flags, mode);
1604 if (layout == NULL || fd < 0)
1607 lum = llapi_layout_to_lum(layout);
1616 if (lum->lmm_magic == LOV_USER_MAGIC_COMP_V1)
1617 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
1618 else if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC)
1619 lum_size = lov_user_md_size(lum->lmm_stripe_count,
1622 lum_size = lov_user_md_size(0, lum->lmm_magic);
1624 rc = fsetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_size, 0);
1633 errno = errno == EOPNOTSUPP ? ENOTTY : errno;
1639 * Create a file with a given \a layout.
1641 * Force O_CREAT and O_EXCL flags on so caller is assured that file was
1642 * created with the given \a layout on successful function return.
1644 * \param[in] path name of the file to open
1645 * \param[in] open_flags open() flags
1646 * \param[in] mode permissions to create new file with
1647 * \param[in] layout layout to create new file with
1649 * \retval non-negative file descriptor on successful open
1650 * \retval -1 if an error occurred
1652 int llapi_layout_file_create(const char *path, int open_flags, int mode,
1653 const struct llapi_layout *layout)
1655 return llapi_layout_file_open(path, open_flags|O_CREAT|O_EXCL, mode,
1659 int llapi_layout_flags_get(struct llapi_layout *layout, uint32_t *flags)
1661 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1666 *flags = layout->llot_flags;
1671 * Set flags to the header of a component layout.
1673 int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
1675 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1680 layout->llot_flags = flags;
1684 const char *llapi_layout_flags_string(uint32_t flags)
1686 switch (flags & LCM_FL_FLR_MASK) {
1689 case LCM_FL_WRITE_PENDING:
1691 case LCM_FL_SYNC_PENDING:
1698 __u16 llapi_layout_string_flags(char *string)
1700 if (strncmp(string, "ro", strlen(string)) == 0)
1701 return LCM_FL_RDONLY;
1702 if (strncmp(string, "wp", strlen(string)) == 0)
1703 return LCM_FL_WRITE_PENDING;
1704 if (strncmp(string, "sp", strlen(string)) == 0)
1705 return LCM_FL_SYNC_PENDING;
1711 * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
1712 * @count: Mirror count value to be checked.
1714 * This function checks the validity of mirror count.
1716 * Return: true on success or false on failure.
1718 static bool llapi_layout_mirror_count_is_valid(uint16_t count)
1720 return count >= 0 && count <= LUSTRE_MIRROR_COUNT_MAX;
1724 * llapi_layout_mirror_count_get() - Get mirror count from the header of
1726 * @layout: Layout to get mirror count from.
1727 * @count: Returned mirror count value.
1729 * This function gets mirror count from the header of a layout.
1731 * Return: 0 on success or -1 on failure.
1733 int llapi_layout_mirror_count_get(struct llapi_layout *layout,
1736 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1741 *count = layout->llot_mirror_count;
1746 * llapi_layout_mirror_count_set() - Set mirror count to the header of a layout.
1747 * @layout: Layout to set mirror count in.
1748 * @count: Mirror count value to be set.
1750 * This function sets mirror count to the header of a layout.
1752 * Return: 0 on success or -1 on failure.
1754 int llapi_layout_mirror_count_set(struct llapi_layout *layout,
1757 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1762 if (!llapi_layout_mirror_count_is_valid(count)) {
1767 layout->llot_mirror_count = count;
1772 * Fetch the start and end offset of the current layout component.
1774 * \param[in] layout the layout component
1775 * \param[out] start extent start, inclusive
1776 * \param[out] end extent end, exclusive
1778 * \retval 0 on success
1779 * \retval <0 if error occurs
1781 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
1782 uint64_t *start, uint64_t *end)
1784 struct llapi_layout_comp *comp;
1786 comp = __llapi_layout_cur_comp(layout);
1790 if (start == NULL || end == NULL) {
1795 *start = comp->llc_extent.e_start;
1796 *end = comp->llc_extent.e_end;
1802 * Set the layout extent of a layout.
1804 * \param[in] layout the layout to be set
1805 * \param[in] start extent start, inclusive
1806 * \param[in] end extent end, exclusive
1808 * \retval 0 on success
1809 * \retval <0 if error occurs
1811 int llapi_layout_comp_extent_set(struct llapi_layout *layout,
1812 uint64_t start, uint64_t end)
1814 struct llapi_layout_comp *comp;
1816 comp = __llapi_layout_cur_comp(layout);
1825 comp->llc_extent.e_start = start;
1826 comp->llc_extent.e_end = end;
1827 layout->llot_is_composite = true;
1833 * Gets the attribute flags of the current component.
1835 * \param[in] layout the layout component
1836 * \param[out] flags stored the returned component flags
1838 * \retval 0 on success
1839 * \retval <0 if error occurs
1841 int llapi_layout_comp_flags_get(const struct llapi_layout *layout,
1844 struct llapi_layout_comp *comp;
1846 comp = __llapi_layout_cur_comp(layout);
1850 if (flags == NULL) {
1855 *flags = comp->llc_flags;
1861 * Sets the specified flags of the current component leaving other flags as-is.
1863 * \param[in] layout the layout component
1864 * \param[in] flags component flags to be set
1866 * \retval 0 on success
1867 * \retval <0 if error occurs
1869 int llapi_layout_comp_flags_set(struct llapi_layout *layout, uint32_t flags)
1871 struct llapi_layout_comp *comp;
1873 comp = __llapi_layout_cur_comp(layout);
1877 comp->llc_flags |= flags;
1883 * Clears the flags specified in the flags leaving other flags as-is.
1885 * \param[in] layout the layout component
1886 * \param[in] flags component flags to be cleared
1888 * \retval 0 on success
1889 * \retval <0 if error occurs
1891 int llapi_layout_comp_flags_clear(struct llapi_layout *layout,
1894 struct llapi_layout_comp *comp;
1896 comp = __llapi_layout_cur_comp(layout);
1900 comp->llc_flags &= ~flags;
1906 * Fetches the file-unique component ID of the current layout component.
1908 * \param[in] layout the layout component
1909 * \param[out] id stored the returned component ID
1911 * \retval 0 on success
1912 * \retval <0 if error occurs
1914 int llapi_layout_comp_id_get(const struct llapi_layout *layout, uint32_t *id)
1916 struct llapi_layout_comp *comp;
1918 comp = __llapi_layout_cur_comp(layout);
1932 * Return the mirror id of the current layout component.
1934 * \param[in] layout the layout component
1935 * \param[out] id stored the returned mirror ID
1937 * \retval 0 on success
1938 * \retval <0 if error occurs
1940 int llapi_layout_mirror_id_get(const struct llapi_layout *layout, uint32_t *id)
1942 struct llapi_layout_comp *comp;
1944 comp = __llapi_layout_cur_comp(layout);
1953 *id = mirror_id_of(comp->llc_id);
1959 * Adds a component to \a layout, the new component will be added to
1960 * the tail of components list and it'll inherit attributes of existing
1961 * ones. The \a layout will change it's current component pointer to
1962 * the newly added component, and it'll be turned into a composite
1963 * layout if it was not before the adding.
1965 * \param[in] layout existing composite or plain layout
1967 * \retval 0 on success
1968 * \retval <0 if error occurs
1970 int llapi_layout_comp_add(struct llapi_layout *layout)
1972 struct llapi_layout_comp *last, *comp, *new;
1973 bool composite = layout->llot_is_composite;
1975 comp = __llapi_layout_cur_comp(layout);
1979 new = __llapi_comp_alloc(0);
1983 last = list_entry(layout->llot_comp_list.prev, typeof(*last),
1986 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1988 /* We must mark the layout composite for the sanity check, but it may
1989 * not stay that way if the check fails */
1990 layout->llot_is_composite = true;
1991 layout->llot_cur_comp = new;
1993 /* We need to set a temporary non-zero value for "end" when we call
1994 * comp_extent_set, so we use LUSTRE_EOF-1, which is > all allowed
1995 * for the end of the previous component. (If we're adding this
1996 * component, the end of the previous component cannot be EOF.) */
1997 if (llapi_layout_comp_extent_set(layout, last->llc_extent.e_end,
1999 llapi_layout_comp_del(layout);
2000 layout->llot_is_composite = composite;
2007 * Adds a first component of a mirror to \a layout.
2008 * The \a layout will change it's current component pointer to
2009 * the newly added component, and it'll be turned into a composite
2010 * layout if it was not before the adding.
2012 * \param[in] layout existing composite or plain layout
2014 * \retval 0 on success
2015 * \retval <0 if error occurs
2017 int llapi_layout_add_first_comp(struct llapi_layout *layout)
2019 struct llapi_layout_comp *comp, *new;
2021 comp = __llapi_layout_cur_comp(layout);
2025 new = __llapi_comp_alloc(0);
2029 new->llc_extent.e_start = 0;
2031 list_add_tail(&new->llc_list, &layout->llot_comp_list);
2032 layout->llot_cur_comp = new;
2033 layout->llot_is_composite = true;
2039 * Deletes current component from the composite layout. The component
2040 * to be deleted must be the tail of components list, and it can't be
2041 * the only component in the layout.
2043 * \param[in] layout composite layout
2045 * \retval 0 on success
2046 * \retval <0 if error occurs
2048 int llapi_layout_comp_del(struct llapi_layout *layout)
2050 struct llapi_layout_comp *comp;
2052 comp = __llapi_layout_cur_comp(layout);
2056 if (!layout->llot_is_composite) {
2061 /* It must be the tail of the list (for PFL, can be relaxed
2062 * once we get mirrored components) */
2063 if (comp->llc_list.next != &layout->llot_comp_list) {
2067 layout->llot_cur_comp =
2068 list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
2069 if (comp->llc_list.prev == &layout->llot_comp_list)
2070 layout->llot_cur_comp = NULL;
2072 list_del_init(&comp->llc_list);
2073 __llapi_comp_free(comp);
2079 * Move the current component pointer to the component with
2080 * specified component ID.
2082 * \param[in] layout composite layout
2083 * \param[in] id component ID
2085 * \retval =0 : moved successfully
2086 * \retval <0 if error occurs
2088 int llapi_layout_comp_use_id(struct llapi_layout *layout, uint32_t comp_id)
2090 struct llapi_layout_comp *comp;
2092 comp = __llapi_layout_cur_comp(layout);
2094 return -1; /* use previously set errno */
2096 if (!layout->llot_is_composite) {
2101 if (comp_id == LCME_ID_INVAL) {
2106 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
2107 if (comp->llc_id == comp_id) {
2108 layout->llot_cur_comp = comp;
2117 * Move the current component pointer to a specified position.
2119 * \param[in] layout composite layout
2120 * \param[in] pos the position to be moved, it can be:
2121 * LLAPI_LAYOUT_COMP_USE_FIRST: use first component
2122 * LLAPI_LAYOUT_COMP_USE_LAST: use last component
2123 * LLAPI_LAYOUT_COMP_USE_NEXT: use component after current
2124 * LLAPI_LAYOUT_COMP_USE_PREV: use component before current
2126 * \retval =0 : moved successfully
2127 * \retval =1 : at last component with NEXT, at first component with PREV
2128 * \retval <0 if error occurs
2130 int llapi_layout_comp_use(struct llapi_layout *layout,
2131 enum llapi_layout_comp_use pos)
2133 struct llapi_layout_comp *comp, *head, *tail;
2135 comp = __llapi_layout_cur_comp(layout);
2139 if (!layout->llot_is_composite) {
2140 if (pos == LLAPI_LAYOUT_COMP_USE_FIRST ||
2141 pos == LLAPI_LAYOUT_COMP_USE_LAST)
2147 head = list_entry(layout->llot_comp_list.next, typeof(*head), llc_list);
2148 tail = list_entry(layout->llot_comp_list.prev, typeof(*tail), llc_list);
2150 case LLAPI_LAYOUT_COMP_USE_FIRST:
2151 layout->llot_cur_comp = head;
2153 case LLAPI_LAYOUT_COMP_USE_NEXT:
2158 layout->llot_cur_comp = list_entry(comp->llc_list.next,
2159 typeof(*comp), llc_list);
2161 case LLAPI_LAYOUT_COMP_USE_LAST:
2162 layout->llot_cur_comp = tail;
2164 case LLAPI_LAYOUT_COMP_USE_PREV:
2169 layout->llot_cur_comp = list_entry(comp->llc_list.prev,
2170 typeof(*comp), llc_list);
2181 * Add layout component(s) to an existing file.
2183 * \param[in] path The path name of the file
2184 * \param[in] layout The layout component(s) to be added
2186 int llapi_layout_file_comp_add(const char *path,
2187 const struct llapi_layout *layout)
2189 int rc, fd = -1, lum_size, tmp_errno = 0;
2190 struct llapi_layout *existing_layout = NULL;
2191 struct lov_user_md *lum = NULL;
2193 if (path == NULL || layout == NULL ||
2194 layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
2199 fd = open(path, O_RDWR);
2206 existing_layout = llapi_layout_get_by_fd(fd, 0);
2207 if (existing_layout == NULL) {
2213 rc = llapi_layout_merge(&existing_layout, layout);
2220 rc = llapi_layout_sanity(existing_layout, false, false);
2223 llapi_layout_sanity_perror(rc);
2228 lum = llapi_layout_to_lum(layout);
2235 if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) {
2240 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2242 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".add", lum, lum_size, 0);
2252 llapi_layout_free(existing_layout);
2258 * Delete component(s) by the specified component id or component flags
2259 * from an existing file.
2261 * \param[in] path path name of the file
2262 * \param[in] id unique component ID
2263 * \param[in] flags flags: LCME_FL_* or;
2264 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2266 int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags)
2268 int rc = 0, fd = -1, lum_size, tmp_errno = 0;
2269 struct llapi_layout *layout;
2270 struct llapi_layout_comp *comp, *next;
2271 struct llapi_layout *existing_layout = NULL;
2272 struct lov_user_md *lum = NULL;
2274 if (path == NULL || id > LCME_ID_MAX || (flags & ~LCME_KNOWN_FLAGS)) {
2279 /* Can only specify ID or flags, not both, not none. */
2280 if ((id != LCME_ID_INVAL && flags != 0) ||
2281 (id == LCME_ID_INVAL && flags == 0)) {
2286 layout = llapi_layout_alloc();
2290 llapi_layout_comp_extent_set(layout, 0, LUSTRE_EOF);
2291 comp = __llapi_layout_cur_comp(layout);
2299 comp->llc_flags = flags;
2301 lum = llapi_layout_to_lum(layout);
2307 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2309 fd = open(path, O_RDWR);
2316 existing_layout = llapi_layout_get_by_fd(fd, 0);
2317 if (existing_layout == NULL) {
2325 while (rc == 0 && existing_layout->llot_cur_comp != NULL) {
2326 rc = llapi_layout_comp_use(existing_layout, comp ?
2327 LLAPI_LAYOUT_COMP_USE_PREV :
2328 LLAPI_LAYOUT_COMP_USE_LAST);
2333 comp = __llapi_layout_cur_comp(existing_layout);
2339 if (id != LCME_ID_INVAL && id != comp->llc_id)
2341 else if ((flags & LCME_FL_NEG) && (flags & comp->llc_flags))
2343 else if (flags && !(flags & comp->llc_flags))
2346 rc = llapi_layout_comp_del(existing_layout);
2347 /* the layout position is moved to previous one, adjust */
2355 rc = llapi_layout_sanity(existing_layout, false, false);
2358 llapi_layout_sanity_perror(rc);
2363 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".del", lum, lum_size, 0);
2374 llapi_layout_free(layout);
2375 llapi_layout_free(existing_layout);
2381 /* Internal utility function to apply flags for sanity checking */
2382 static void llapi_layout_comp_apply_flags(struct llapi_layout_comp *comp,
2385 if (flags & LCME_FL_NEG)
2386 comp->llc_flags &= ~flags;
2388 comp->llc_flags |= flags;
2391 struct llapi_layout_apply_flags_args {
2393 uint32_t *lfa_flags;
2399 static int llapi_layout_apply_flags_cb(struct llapi_layout *layout,
2402 struct llapi_layout_apply_flags_args *args = arg;
2403 struct llapi_layout_comp *comp;
2406 comp = __llapi_layout_cur_comp(layout);
2409 return LLAPI_LAYOUT_ITER_STOP;
2412 for (i = 0; i < args->lfa_count; i++) {
2413 if (comp->llc_id == args->lfa_ids[i])
2414 llapi_layout_comp_apply_flags(comp, args->lfa_flags[i]);
2417 return LLAPI_LAYOUT_ITER_CONT;
2420 /* Apply flags to the layout for sanity checking */
2421 static int llapi_layout_apply_flags(struct llapi_layout *layout, uint32_t *ids,
2422 uint32_t *flags, int count)
2424 struct llapi_layout_apply_flags_args args;
2427 if (!ids || !flags || count == 0) {
2433 args.lfa_flags = flags;
2434 args.lfa_count = count;
2437 rc = llapi_layout_comp_iterate(layout,
2438 llapi_layout_apply_flags_cb,
2440 if (errno == ENOENT)
2443 if (rc != LLAPI_LAYOUT_ITER_CONT)
2449 * Change flags by component ID of components of an existing file.
2450 * The component to be modified is specified by the comp->lcme_id value,
2451 * which must be a unique component ID.
2453 * \param[in] path path name of the file
2454 * \param[in] ids An array of component IDs
2455 * \param[in] flags flags: LCME_FL_* or;
2456 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2457 * \param[in] count Number of elements in ids and flags array
2459 int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags,
2462 int rc = -1, fd = -1, i, tmp_errno = 0;
2464 struct llapi_layout *existing_layout = NULL;
2465 struct llapi_layout *layout = NULL;
2466 struct llapi_layout_comp *comp;
2467 struct lov_user_md *lum = NULL;
2477 for (i = 0; i < count; i++) {
2478 if (!ids[i] || !flags[i]) {
2483 if (ids[i] > LCME_ID_MAX || (flags[i] & ~LCME_KNOWN_FLAGS)) {
2488 /* do not allow to set or clear INIT flag */
2489 if (flags[i] & LCME_FL_INIT) {
2495 fd = open(path, O_RDWR);
2502 existing_layout = llapi_layout_get_by_fd(fd, 0);
2503 if (existing_layout == NULL) {
2509 if (llapi_layout_apply_flags(existing_layout, ids, flags, count)) {
2515 rc = llapi_layout_sanity(existing_layout, false, false);
2518 llapi_layout_sanity_perror(rc);
2523 layout = __llapi_layout_alloc();
2524 if (layout == NULL) {
2530 layout->llot_is_composite = true;
2531 for (i = 0; i < count; i++) {
2532 comp = __llapi_comp_alloc(0);
2539 comp->llc_id = ids[i];
2540 comp->llc_flags = flags[i];
2542 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
2543 layout->llot_cur_comp = comp;
2546 lum = llapi_layout_to_lum(layout);
2553 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2555 /* flush cached pages from clients */
2556 rc = llapi_file_flush(fd);
2563 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0);
2576 llapi_layout_free(existing_layout);
2577 llapi_layout_free(layout);
2583 * Check if the file layout is composite.
2585 * \param[in] layout the file layout to check
2587 * \retval true composite
2588 * \retval false not composite
2590 bool llapi_layout_is_composite(struct llapi_layout *layout)
2592 return layout->llot_is_composite;
2596 * Iterate every components in the @layout and call callback function @cb.
2598 * \param[in] layout component layout list.
2599 * \param[in] cb callback for each component
2600 * \param[in] cbdata callback data
2602 * \retval < 0 error happens during the iteration
2603 * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error
2604 * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration
2606 int llapi_layout_comp_iterate(struct llapi_layout *layout,
2607 llapi_layout_iter_cb cb, void *cbdata)
2611 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2616 * make sure on success llapi_layout_comp_use() API returns 0 with
2622 rc = cb(layout, cbdata);
2623 if (rc != LLAPI_LAYOUT_ITER_CONT)
2626 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2629 else if (rc == 1) /* reached the last comp */
2630 return LLAPI_LAYOUT_ITER_CONT;
2637 * llapi_layout_merge() - Merge a composite layout into another one.
2638 * @dst_layout: Destination composite layout.
2639 * @src_layout: Source composite layout.
2641 * This function copies all of the components from @src_layout and
2642 * appends them to @dst_layout.
2644 * Return: 0 on success or -1 on failure.
2646 int llapi_layout_merge(struct llapi_layout **dst_layout,
2647 const struct llapi_layout *src_layout)
2649 struct llapi_layout *new_layout = *dst_layout;
2650 struct llapi_layout_comp *new = NULL;
2651 struct llapi_layout_comp *comp = NULL;
2654 if (src_layout == NULL ||
2655 list_empty((struct list_head *)&src_layout->llot_comp_list))
2658 if (new_layout == NULL) {
2659 new_layout = __llapi_layout_alloc();
2660 if (new_layout == NULL) {
2666 list_for_each_entry(comp, &src_layout->llot_comp_list, llc_list) {
2667 new = __llapi_comp_alloc(0);
2673 new->llc_pattern = comp->llc_pattern;
2674 new->llc_stripe_size = comp->llc_stripe_size;
2675 new->llc_stripe_count = comp->llc_stripe_count;
2676 new->llc_stripe_offset = comp->llc_stripe_offset;
2678 if (comp->llc_pool_name[0] != '\0')
2679 strncpy(new->llc_pool_name, comp->llc_pool_name,
2680 sizeof(new->llc_pool_name));
2682 for (i = 0; i < comp->llc_objects_count; i++) {
2683 if (__llapi_comp_objects_realloc(new,
2684 stripe_number_roundup(i)) < 0) {
2686 __llapi_comp_free(new);
2689 new->llc_objects[i].l_ost_idx = \
2690 comp->llc_objects[i].l_ost_idx;
2693 new->llc_objects_count = comp->llc_objects_count;
2694 new->llc_extent.e_start = comp->llc_extent.e_start;
2695 new->llc_extent.e_end = comp->llc_extent.e_end;
2696 new->llc_id = comp->llc_id;
2697 new->llc_flags = comp->llc_flags;
2699 list_add_tail(&new->llc_list, &new_layout->llot_comp_list);
2700 new_layout->llot_cur_comp = new;
2702 new_layout->llot_is_composite = true;
2704 *dst_layout = new_layout;
2707 llapi_layout_free(new_layout);
2712 * Get the last initialized component
2714 * \param[in] layout component layout list.
2717 * \retval -EINVAL not found
2718 * \retval -EISDIR directory layout
2720 int llapi_layout_get_last_init_comp(struct llapi_layout *layout)
2722 struct llapi_layout_comp *comp = NULL, *head = NULL;
2724 if (!layout->llot_is_composite)
2727 head = list_entry(layout->llot_comp_list.next, typeof(*comp), llc_list);
2730 if (head->llc_id == 0 && !(head->llc_flags & LCME_FL_INIT))
2734 /* traverse the components from the tail to find the last init one */
2735 comp = list_entry(layout->llot_comp_list.prev, typeof(*comp), llc_list);
2736 while (comp != head) {
2737 if (comp->llc_flags & LCME_FL_INIT)
2739 comp = list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
2742 layout->llot_cur_comp = comp;
2744 return comp->llc_flags & LCME_FL_INIT ? 0 : -EINVAL;
2748 * Interit stripe info from the file's component to the mirror
2750 * \param[in] layout file component layout list.
2751 * \param[in] layout mirro component layout list.
2753 * \retval 0 on success
2754 * \retval -EINVAL on error
2756 int llapi_layout_mirror_inherit(struct llapi_layout *f_layout,
2757 struct llapi_layout *m_layout)
2759 struct llapi_layout_comp *m_comp = NULL;
2760 struct llapi_layout_comp *f_comp = NULL;
2763 f_comp = __llapi_layout_cur_comp(f_layout);
2766 m_comp = __llapi_layout_cur_comp(m_layout);
2770 /* DoM component does not inherit stripe size */
2771 if (m_comp->llc_pattern != LLAPI_LAYOUT_MDT)
2772 m_comp->llc_stripe_size = f_comp->llc_stripe_size;
2773 m_comp->llc_stripe_count = f_comp->llc_stripe_count;
2779 * Find all stale components.
2781 * \param[in] layout component layout list.
2782 * \param[out] comp array of stale component info.
2783 * \param[in] comp_size array size of @comp.
2784 * \param[in] mirror_ids array of mirror id that only components
2785 * belonging to these mirror will be collected.
2786 * \param[in] ids_nr number of mirror ids array.
2788 * \retval number of component info collected on success or
2789 * an error code on failure.
2791 int llapi_mirror_find_stale(struct llapi_layout *layout,
2792 struct llapi_resync_comp *comp, size_t comp_size,
2793 __u16 *mirror_ids, int ids_nr)
2798 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2806 uint64_t start, end;
2808 rc = llapi_layout_comp_flags_get(layout, &flags);
2812 if (!(flags & LCME_FL_STALE))
2815 rc = llapi_layout_mirror_id_get(layout, &mirror_id);
2819 /* the caller only wants stale components from specific
2824 for (j = 0; j < ids_nr; j++) {
2825 if (mirror_ids[j] == mirror_id)
2829 /* not in the specified mirror */
2832 } else if (flags & LCME_FL_NOSYNC) {
2833 /* if not specified mirrors, do not resync "nosync"
2838 rc = llapi_layout_comp_id_get(layout, &id);
2842 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2846 /* pack this component into @comp array */
2847 comp[idx].lrc_id = id;
2848 comp[idx].lrc_mirror_id = mirror_id;
2849 comp[idx].lrc_start = start;
2850 comp[idx].lrc_end = end;
2853 if (idx >= comp_size) {
2859 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2866 return rc < 0 ? rc : idx;
2869 /* locate @layout to a valid component covering file [file_start, file_end) */
2870 int llapi_mirror_find(struct llapi_layout *layout, uint64_t file_start,
2871 uint64_t file_end, uint64_t *endp)
2873 uint32_t mirror_id = 0;
2876 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2882 uint64_t start, end;
2883 uint32_t flags, id, rid;
2885 rc = llapi_layout_comp_flags_get(layout, &flags);
2889 if (flags & LCME_FL_STALE)
2892 rc = llapi_layout_mirror_id_get(layout, &rid);
2896 rc = llapi_layout_comp_id_get(layout, &id);
2900 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2904 if (file_start >= start && file_start < end) {
2907 else if (mirror_id != rid || *endp != start)
2910 file_start = *endp = end;
2911 if (end >= file_end)
2916 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2926 int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
2927 struct llapi_resync_comp *comp_array,
2928 int comp_size, uint64_t start, uint64_t end)
2930 size_t page_size = sysconf(_SC_PAGESIZE);
2931 const size_t buflen = 4 << 20; /* 4M */
2933 uint64_t pos = start;
2934 uint64_t data_off = pos, data_end = pos;
2940 rc = posix_memalign(&buf, page_size, buflen);
2945 uint64_t mirror_end = LUSTRE_EOF;
2950 if (pos >= data_end) {
2954 if (pos >= mirror_end || !src) {
2955 rc = llapi_mirror_find(layout, pos, end,
2960 /* restrict mirror end by resync end */
2961 mirror_end = MIN(end, mirror_end);
2964 tmp_off = llapi_mirror_data_seek(fd, src, pos,
2967 /* switch to full copy */
2968 to_read = mirror_end - pos;
2972 data_end = data_off + data_size;
2974 data_off = MIN(data_off, mirror_end);
2975 data_end = MIN(data_end, mirror_end);
2977 /* align by page, if there is data block to copy */
2979 data_off &= ~(page_size - 1);
2982 if (pos < data_off) {
2983 for (i = 0; i < comp_size; i++) {
2986 uint32_t mid = comp_array[i].lrc_mirror_id;
2988 /* skip non-overlapped component */
2989 if (pos >= comp_array[i].lrc_end ||
2990 data_off <= comp_array[i].lrc_start)
2993 if (pos < comp_array[i].lrc_start)
2994 cur_pos = comp_array[i].lrc_start;
2998 if (data_off > comp_array[i].lrc_end)
2999 to_punch = comp_array[i].lrc_end -
3002 to_punch = data_off - cur_pos;
3004 if (comp_array[i].lrc_end == OBD_OBJECT_EOF) {
3005 /* the last component can be truncated
3008 rc = llapi_mirror_truncate(fd, mid,
3010 /* hole at the end of file, so just
3011 * truncate up to set size.
3013 if (!rc && data_off == data_end)
3014 rc = llapi_mirror_truncate(fd,
3017 rc = llapi_mirror_punch(fd,
3018 comp_array[i].lrc_mirror_id,
3021 /* if failed then read failed hole range */
3025 if (pos + to_punch == data_off)
3026 to_read = data_end - pos;
3034 if (pos == mirror_end)
3036 to_read = data_end - pos;
3041 assert(data_end <= mirror_end);
3043 to_read = MIN(buflen, to_read);
3044 to_read = ((to_read - 1) | (page_size - 1)) + 1;
3045 bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
3046 if (bytes_read == 0) {
3050 if (bytes_read < 0) {
3055 /* round up to page align to make direct IO happy. */
3056 to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
3058 for (i = 0; i < comp_size; i++) {
3061 size_t to_write2 = to_write;
3063 /* skip non-overlapped component */
3064 if (pos >= comp_array[i].lrc_end ||
3065 pos + to_write <= comp_array[i].lrc_start)
3068 if (pos < comp_array[i].lrc_start)
3069 pos2 = comp_array[i].lrc_start;
3071 to_write2 -= pos2 - pos;
3073 if ((pos + to_write) > comp_array[i].lrc_end)
3074 to_write2 -= pos + to_write -
3075 comp_array[i].lrc_end;
3077 written = llapi_mirror_write(fd,
3078 comp_array[i].lrc_mirror_id,
3083 * this component is not written successfully,
3084 * mark it using its lrc_synced, it is supposed
3085 * to be false before getting here.
3087 * And before this function returns, all
3088 * elements of comp_array will reverse their
3089 * lrc_synced flag to reflect their true
3092 comp_array[i].lrc_synced = true;
3093 llapi_error(LLAPI_MSG_ERROR, written,
3094 "component %u not synced",
3095 comp_array[i].lrc_id);
3100 assert(written == to_write2);
3108 /* fatal error happens */
3109 for (i = 0; i < comp_size; i++)
3110 comp_array[i].lrc_synced = false;
3115 * no fatal error happens, each lrc_synced tells whether the component
3116 * has been resync successfully (note: we'd reverse the value to
3117 * reflect its true meaning.
3119 for (i = 0; i < comp_size; i++) {
3120 comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
3121 if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
3122 rc = llapi_mirror_truncate(fd,
3123 comp_array[i].lrc_mirror_id, pos);
3124 /* Ignore truncate error on encrypted file without the
3125 * key if tried on LUSTRE_ENCRYPTION_UNIT_SIZE boundary.
3127 if (rc < 0 && (rc != -ENOKEY ||
3128 pos & ~LUSTRE_ENCRYPTION_MASK))
3129 comp_array[i].lrc_synced = false;
3134 * returns the first error code for partially successful resync if
3140 enum llapi_layout_comp_sanity_error {
3142 LSE_INCOMPLETE_MIRROR,
3143 LSE_ADJACENT_EXTENSION,
3147 LSE_DOM_EXTENSION_FOLLOWING,
3150 LSE_NOT_ZERO_LENGTH_EXTENDABLE,
3151 LSE_END_NOT_GREATER,
3152 LSE_ZERO_LENGTH_NORMAL,
3153 LSE_NOT_ADJACENT_PREV,
3160 const char *const llapi_layout_strerror[] =
3163 [LSE_INCOMPLETE_MIRROR] =
3164 "Incomplete mirror - must go to EOF",
3165 [LSE_ADJACENT_EXTENSION] =
3166 "No adjacent extension space components",
3167 [LSE_INIT_EXTENSION] =
3168 "Cannot apply extension flag to init components",
3171 [LSE_DOM_EXTENSION] =
3172 "DoM components can't be extension space",
3173 [LSE_DOM_EXTENSION_FOLLOWING] =
3174 "DoM components cannot be followed by extension space",
3176 "DoM component should be the first one in a file/mirror",
3177 [LSE_SET_COMP_START] =
3178 "Must set previous component extent before adding next",
3179 [LSE_NOT_ZERO_LENGTH_EXTENDABLE] =
3180 "Extendable component must start out zero-length",
3181 [LSE_END_NOT_GREATER] =
3182 "Component end is before end of previous component",
3183 [LSE_ZERO_LENGTH_NORMAL] =
3184 "Zero length components must be followed by extension",
3185 [LSE_NOT_ADJACENT_PREV] =
3186 "Components not adjacent (end != next->start",
3187 [LSE_START_GT_END] =
3188 "Component start is > end",
3190 "The component end must be aligned by the stripe size",
3192 "The extension size must be aligned by the stripe size",
3195 struct llapi_layout_sanity_args {
3196 bool lsa_incomplete;
3202 /* The component flags can be set by users at creation/modification time. */
3203 #define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
3207 * When modified, adjust llapi_stripe_param_verify() if needed as well.
3209 static int llapi_layout_sanity_cb(struct llapi_layout *layout,
3212 struct llapi_layout_comp *comp, *next, *prev;
3213 struct llapi_layout_sanity_args *args = arg;
3214 bool first_comp = false;
3216 comp = __llapi_layout_cur_comp(layout);
3222 if (comp->llc_list.prev != &layout->llot_comp_list)
3223 prev = list_entry(comp->llc_list.prev, typeof(*prev),
3228 if (comp->llc_list.next != &layout->llot_comp_list)
3229 next = list_entry(comp->llc_list.next, typeof(*next),
3234 /* Start of zero implies a new mirror */
3235 if (comp->llc_extent.e_start == 0) {
3237 /* Most checks apply only within one mirror, this is an
3239 if (prev && prev->llc_extent.e_end != LUSTRE_EOF) {
3240 args->lsa_rc = LSE_INCOMPLETE_MIRROR;
3247 if (next && next->llc_extent.e_start == 0)
3250 /* Flag sanity checks */
3251 /* No adjacent extension components */
3252 if ((comp->llc_flags & LCME_FL_EXTENSION) && next &&
3253 (next->llc_flags & LCME_FL_EXTENSION)) {
3254 args->lsa_rc = LSE_ADJACENT_EXTENSION;
3258 /* Extension flag cannot be applied to init components and the first
3259 * component of each mirror is automatically init */
3260 if ((comp->llc_flags & LCME_FL_EXTENSION) &&
3261 (comp->llc_flags & LCME_FL_INIT || first_comp)) {
3262 args->lsa_rc = LSE_INIT_EXTENSION;
3266 if (comp->llc_ondisk) {
3267 if (comp->llc_flags & LCME_FL_NEG)
3268 args->lsa_rc = LSE_FLAGS;
3269 } else if (!args->lsa_incomplete) {
3270 if (args->lsa_flr) {
3271 if (comp->llc_flags & ~LCME_USER_COMP_FLAGS)
3272 args->lsa_rc = LSE_FLAGS;
3274 if (comp->llc_flags &
3275 ~(LCME_FL_EXTENSION | LCME_FL_PREF_RW))
3276 args->lsa_rc = LSE_FLAGS;
3282 /* DoM sanity checks */
3283 if (comp->llc_pattern == LLAPI_LAYOUT_MDT ||
3284 comp->llc_pattern == LOV_PATTERN_MDT) {
3285 /* DoM components can't be extension components */
3286 if (comp->llc_flags & LCME_FL_EXTENSION) {
3287 args->lsa_rc = LSE_DOM_EXTENSION;
3290 /* DoM components cannot be followed by an extension comp */
3291 if (next && (next->llc_flags & LCME_FL_EXTENSION)) {
3292 args->lsa_rc = LSE_DOM_EXTENSION_FOLLOWING;
3296 /* DoM should be the first component in a mirror */
3298 args->lsa_rc = LSE_DOM_FIRST;
3304 /* Extent sanity checks */
3305 /* Must set previous component extent before adding another */
3306 if (prev && prev->llc_extent.e_start == 0 &&
3307 prev->llc_extent.e_end == 0) {
3308 args->lsa_rc = LSE_SET_COMP_START;
3312 if (!args->lsa_incomplete) {
3313 /* Components followed by extension space (extendable
3314 * components) must be zero length before initialization.
3315 * (Except for first comp, which will be initialized on
3317 if (next && (next->llc_flags & LCME_FL_EXTENSION) &&
3318 !first_comp && !(comp->llc_flags & LCME_FL_INIT) &&
3319 comp->llc_extent.e_start != comp->llc_extent.e_end) {
3320 args->lsa_rc = LSE_NOT_ZERO_LENGTH_EXTENDABLE;
3324 /* End must come after end of previous comp */
3325 if (prev && comp->llc_extent.e_end < prev->llc_extent.e_end) {
3326 args->lsa_rc = LSE_END_NOT_GREATER;
3330 /* Components not followed by ext space must have length > 0. */
3331 if (comp->llc_extent.e_start == comp->llc_extent.e_end &&
3332 (next == NULL || !(next->llc_flags & LCME_FL_EXTENSION))) {
3333 args->lsa_rc = LSE_ZERO_LENGTH_NORMAL;
3337 /* The component end must be aligned by the stripe size */
3338 if ((comp->llc_flags & LCME_FL_EXTENSION) &&
3339 (prev->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
3340 if (comp->llc_extent.e_end != LUSTRE_EOF &&
3341 comp->llc_extent.e_end % prev->llc_stripe_size) {
3342 args->lsa_rc = LSE_ALIGN_END;
3345 if ((comp->llc_stripe_size * SEL_UNIT_SIZE) %
3346 prev->llc_stripe_size) {
3347 args->lsa_rc = LSE_ALIGN_EXT;
3350 } else if (!(comp->llc_flags & LCME_FL_EXTENSION) &&
3351 (comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
3352 if (comp->llc_extent.e_end != LUSTRE_EOF &&
3353 comp->llc_extent.e_end !=
3354 comp->llc_extent.e_start &&
3355 comp->llc_extent.e_end % comp->llc_stripe_size) {
3356 args->lsa_rc = LSE_ALIGN_END;
3362 /* Components must have start == prev->end */
3363 if (prev && comp->llc_extent.e_start != 0 &&
3364 comp->llc_extent.e_start != prev->llc_extent.e_end) {
3365 args->lsa_rc = LSE_NOT_ADJACENT_PREV;
3369 /* Components must have start <= end */
3370 if (comp->llc_extent.e_start > comp->llc_extent.e_end) {
3371 args->lsa_rc = LSE_START_GT_END;
3375 return LLAPI_LAYOUT_ITER_CONT;
3378 errno = errno ? errno : EINVAL;
3379 return LLAPI_LAYOUT_ITER_STOP;
3382 /* Print explanation of layout error */
3383 void llapi_layout_sanity_perror(int error)
3385 if (error >= LSE_LAST || error < 0) {
3386 fprintf(stdout, "Invalid layout, unrecognized error: %d\n",
3389 fprintf(stdout, "Invalid layout: %s\n",
3390 llapi_layout_strerror[error]);
3394 /* Walk a layout and enforce sanity checks that apply to > 1 component
3396 * The core idea here is that of sanity checking individual tokens vs semantic
3398 * We cannot check everything at the individual component level ('token'),
3399 * instead we must check whether or not the full layout has a valid meaning.
3401 * An example of a component level check is "is stripe size valid?". That is
3402 * handled when setting stripe size.
3404 * An example of a layout level check is "are the extents of these components
3405 * valid when adjacent to one another", or "can we set these flags on adjacent
3408 * \param[in] layout component layout list.
3409 * \param[in] fname file the layout to be checked for
3410 * \param[in] incomplete if layout is complete or not - some checks can
3411 * only be done on complete layouts.
3412 * \param[in] flr set when this is called from FLR mirror create
3414 * \retval 0, success, positive: various errors, see
3415 * llapi_layout_sanity_perror, -1, failure
3417 int llapi_layout_sanity(struct llapi_layout *layout,
3421 struct llapi_layout_sanity_args args = { 0 };
3422 struct llapi_layout_comp *curr;
3428 curr = layout->llot_cur_comp;
3435 args.lsa_incomplete = incomplete;
3437 /* When we modify an existing layout, this tells us if it's FLR */
3438 if (mirror_id_of(curr->llc_id) > 0)
3439 args.lsa_flr = true;
3442 rc = llapi_layout_comp_iterate(layout,
3443 llapi_layout_sanity_cb,
3445 if (errno == ENOENT)
3448 if (rc != LLAPI_LAYOUT_ITER_CONT)
3451 layout->llot_cur_comp = curr;
3456 int llapi_layout_dom_size(struct llapi_layout *layout, uint64_t *size)
3458 uint64_t pattern, start;
3461 if (!layout || !llapi_layout_is_composite(layout)) {
3466 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
3470 rc = llapi_layout_pattern_get(layout, &pattern);
3474 if (pattern != LOV_PATTERN_MDT && pattern != LLAPI_LAYOUT_MDT) {
3479 rc = llapi_layout_comp_extent_get(layout, &start, size);
3487 int lov_comp_md_size(struct lov_comp_md_v1 *lcm)
3489 if (lcm->lcm_magic == LOV_MAGIC_V1 || lcm->lcm_magic == LOV_MAGIC_V3) {
3490 struct lov_user_md *lum = (void *)lcm;
3492 return lov_user_md_size(lum->lmm_stripe_count, lum->lmm_magic);
3495 if (lcm->lcm_magic == LOV_MAGIC_FOREIGN) {
3496 struct lov_foreign_md *lfm = (void *)lcm;
3498 return lfm->lfm_length;
3501 if (lcm->lcm_magic != LOV_MAGIC_COMP_V1)
3504 return lcm->lcm_size;
3507 int llapi_get_lum_file_fd(int dir_fd, const char *fname, __u64 *valid,
3508 lstatx_t *statx, struct lov_user_md *lum,
3511 struct lov_user_mds_data *lmd;
3512 char buf[65536 + offsetof(typeof(*lmd), lmd_lmm)];
3516 if (lum && lumsize < sizeof(*lum))
3519 /* If a file name is provided, it is relative to the parent directory */
3525 lmd = (struct lov_user_mds_data *)buf;
3526 rc = get_lmd_info_fd(fname, parent_fd, dir_fd, buf, sizeof(buf),
3532 *valid = lmd->lmd_flags;
3535 memcpy(statx, &lmd->lmd_stx, sizeof(*statx));
3538 if (lmd->lmd_lmmsize > lumsize)
3540 memcpy(lum, &lmd->lmd_lmm, lmd->lmd_lmmsize);
3546 int llapi_get_lum_dir_fd(int dir_fd, __u64 *valid, lstatx_t *statx,
3547 struct lov_user_md *lum, size_t lumsize)
3549 return llapi_get_lum_file_fd(dir_fd, NULL, valid, statx, lum, lumsize);
3552 int llapi_get_lum_file(const char *path, __u64 *valid, lstatx_t *statx,
3553 struct lov_user_md *lum, size_t lumsize)
3555 char parent[PATH_MAX];
3562 tmp = strrchr(path, '/');
3564 strncpy(parent, ".", sizeof(parent) - 1);
3567 strncpy(parent, path, tmp - path);
3568 offset = tmp - path - 1;
3569 parent[tmp - path] = 0;
3574 fname += offset + 2;
3576 dir_fd = open(parent, O_RDONLY);
3579 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
3583 rc = llapi_get_lum_file_fd(dir_fd, fname, valid, statx, lum, lumsize);
3588 int llapi_get_lum_dir(const char *path, __u64 *valid, lstatx_t *statx,
3589 struct lov_user_md *lum, size_t lumsize)
3594 dir_fd = open(path, O_RDONLY);
3597 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
3601 rc = llapi_get_lum_dir_fd(dir_fd, valid, statx, lum, lumsize);