4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the GNU Lesser General Public License
8 * (LGPL) version 2.1 or (at your discretion) any later version.
9 * (LGPL) version 2.1 accompanies this distribution, and is available at
10 * http://www.gnu.org/licenses/lgpl-2.1.html
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
20 * lustre/utils/liblustreapi_layout.c
22 * lustreapi library for layout calls for interacting with the layout of
23 * Lustre files while hiding details of the internal data structures
26 * Copyright (c) 2016, 2017, Intel Corporation.
28 * Author: Ned Bass <bass6@llnl.gov>
38 #include <sys/xattr.h>
39 #include <sys/param.h>
41 #include <libcfs/util/list.h>
42 #include <lustre/lustreapi.h>
43 #include "lustreapi_internal.h"
46 * Layout component, which contains all attributes of a plain
49 struct llapi_layout_comp {
51 uint64_t llc_stripe_size;
52 uint64_t llc_stripe_count;
53 uint64_t llc_stripe_offset;
54 /* Add 1 so user always gets back a null terminated string. */
55 char llc_pool_name[LOV_MAXPOOLNAME + 1];
56 /** Number of objects in llc_objects array if was initialized. */
57 uint32_t llc_objects_count;
58 struct lov_user_ost_data_v1 *llc_objects;
59 /* fields used only for composite layouts */
60 struct lu_extent llc_extent; /* [start, end) of component */
61 uint32_t llc_id; /* unique ID of component */
62 uint32_t llc_flags; /* LCME_FL_* flags */
63 uint64_t llc_timestamp; /* snapshot timestamp */
64 struct list_head llc_list; /* linked to the llapi_layout
70 * An Opaque data type abstracting the layout of a Lustre file.
73 uint32_t llot_magic; /* LLAPI_LAYOUT_MAGIC */
76 bool llot_is_composite;
77 uint16_t llot_mirror_count;
78 /* Cursor pointing to one of the components in llot_comp_list */
79 struct llapi_layout_comp *llot_cur_comp;
80 struct list_head llot_comp_list;
84 * Compute the number of elements in the lmm_objects array of \a lum
85 * with size \a lum_size.
87 * \param[in] lum the struct lov_user_md to check
88 * \param[in] lum_size the number of bytes in \a lum
90 * \retval number of elements in array lum->lmm_objects
92 static int llapi_layout_objects_in_lum(struct lov_user_md *lum, size_t lum_size)
97 if (lum_size < lov_user_md_size(0, LOV_MAGIC_V1))
100 if (lum->lmm_magic == __swab32(LOV_MAGIC_V1) ||
101 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
102 magic = __swab32(lum->lmm_magic);
104 magic = lum->lmm_magic;
106 base_size = lov_user_md_size(0, magic);
108 if (lum_size <= base_size)
111 return (lum_size - base_size) / sizeof(lum->lmm_objects[0]);
115 * Byte-swap the fields of struct lov_user_md.
117 * XXX Rather than duplicating swabbing code here, we should eventually
118 * refactor the needed functions in lustre/ptlrpc/pack_generic.c
119 * into a library that can be shared between kernel and user code.
122 llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
124 int i, j, ent_count, obj_count;
125 struct lov_comp_md_v1 *comp_v1 = NULL;
126 struct lov_comp_md_entry_v1 *ent;
127 struct lov_user_ost_data *lod;
129 if (lum->lmm_magic != __swab32(LOV_MAGIC_V1) &&
130 lum->lmm_magic != __swab32(LOV_MAGIC_V3) &&
131 lum->lmm_magic != __swab32(LOV_MAGIC_COMP_V1))
134 if (lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
135 comp_v1 = (struct lov_comp_md_v1 *)lum;
137 if (comp_v1 != NULL) {
138 comp_v1->lcm_magic = __swab32(comp_v1->lcm_magic);
139 comp_v1->lcm_size = __swab32(comp_v1->lcm_size);
140 comp_v1->lcm_layout_gen = __swab32(comp_v1->lcm_layout_gen);
141 comp_v1->lcm_flags = __swab16(comp_v1->lcm_flags);
142 comp_v1->lcm_entry_count = __swab16(comp_v1->lcm_entry_count);
143 ent_count = comp_v1->lcm_entry_count;
148 for (i = 0; i < ent_count; i++) {
149 if (comp_v1 != NULL) {
150 ent = &comp_v1->lcm_entries[i];
151 ent->lcme_id = __swab32(ent->lcme_id);
152 ent->lcme_flags = __swab32(ent->lcme_flags);
153 ent->lcme_timestamp = __swab64(ent->lcme_timestamp);
154 ent->lcme_extent.e_start = __swab64(ent->lcme_extent.e_start);
155 ent->lcme_extent.e_end = __swab64(ent->lcme_extent.e_end);
156 ent->lcme_offset = __swab32(ent->lcme_offset);
157 ent->lcme_size = __swab32(ent->lcme_size);
159 lum = (struct lov_user_md *)((char *)comp_v1 +
161 lum_size = ent->lcme_size;
163 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
165 lum->lmm_magic = __swab32(lum->lmm_magic);
166 lum->lmm_pattern = __swab32(lum->lmm_pattern);
167 lum->lmm_stripe_size = __swab32(lum->lmm_stripe_size);
168 lum->lmm_stripe_count = __swab16(lum->lmm_stripe_count);
169 lum->lmm_stripe_offset = __swab16(lum->lmm_stripe_offset);
171 if (lum->lmm_magic != LOV_MAGIC_V1) {
172 struct lov_user_md_v3 *v3;
173 v3 = (struct lov_user_md_v3 *)lum;
174 lod = v3->lmm_objects;
176 lod = lum->lmm_objects;
179 for (j = 0; j < obj_count; j++)
180 lod[j].l_ost_idx = __swab32(lod[j].l_ost_idx);
185 * (Re-)allocate llc_objects[] to \a num_stripes stripes.
187 * Copy over existing llc_objects[], if any, to the new llc_objects[].
189 * \param[in] layout existing layout to be modified
190 * \param[in] num_stripes number of stripes in new layout
192 * \retval 0 if the objects are re-allocated successfully
193 * \retval -1 on error with errno set
195 static int __llapi_comp_objects_realloc(struct llapi_layout_comp *comp,
196 unsigned int new_stripes)
198 struct lov_user_ost_data_v1 *new_objects;
201 if (new_stripes > LOV_MAX_STRIPE_COUNT) {
206 if (new_stripes == comp->llc_objects_count)
209 if (new_stripes != 0 && new_stripes <= comp->llc_objects_count)
212 new_objects = realloc(comp->llc_objects,
213 sizeof(*new_objects) * new_stripes);
214 if (new_objects == NULL && new_stripes != 0) {
219 for (i = comp->llc_objects_count; i < new_stripes; i++)
220 new_objects[i].l_ost_idx = LLAPI_LAYOUT_IDX_MAX;
222 comp->llc_objects = new_objects;
223 comp->llc_objects_count = new_stripes;
229 * Allocate storage for a llapi_layout_comp with \a num_stripes stripes.
231 * \param[in] num_stripes number of stripes in new layout
233 * \retval valid pointer if allocation succeeds
234 * \retval NULL if allocation fails
236 static struct llapi_layout_comp *__llapi_comp_alloc(unsigned int num_stripes)
238 struct llapi_layout_comp *comp;
240 if (num_stripes > LOV_MAX_STRIPE_COUNT) {
245 comp = calloc(1, sizeof(*comp));
251 comp->llc_objects = NULL;
252 comp->llc_objects_count = 0;
254 if (__llapi_comp_objects_realloc(comp, num_stripes) < 0) {
260 comp->llc_pattern = LLAPI_LAYOUT_DEFAULT;
261 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
262 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
263 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
264 comp->llc_pool_name[0] = '\0';
265 comp->llc_extent.e_start = 0;
266 comp->llc_extent.e_end = LUSTRE_EOF;
269 INIT_LIST_HEAD(&comp->llc_list);
275 * Free memory allocated for \a comp
277 * \param[in] comp previously allocated by __llapi_comp_alloc()
279 static void __llapi_comp_free(struct llapi_layout_comp *comp)
281 if (comp->llc_objects != NULL)
282 free(comp->llc_objects);
287 * Free memory allocated for \a layout.
289 * \param[in] layout previously allocated by llapi_layout_alloc()
291 void llapi_layout_free(struct llapi_layout *layout)
293 struct llapi_layout_comp *comp, *n;
298 list_for_each_entry_safe(comp, n, &layout->llot_comp_list, llc_list) {
299 list_del_init(&comp->llc_list);
300 __llapi_comp_free(comp);
306 * Allocate and initialize a llapi_layout structure.
308 * \retval valid llapi_layout pointer on success
309 * \retval NULL if memory allocation fails
311 static struct llapi_layout *__llapi_layout_alloc(void)
313 struct llapi_layout *layout;
315 layout = calloc(1, sizeof(*layout));
316 if (layout == NULL) {
322 layout->llot_magic = LLAPI_LAYOUT_MAGIC;
323 layout->llot_gen = 0;
324 layout->llot_flags = 0;
325 layout->llot_is_composite = false;
326 layout->llot_mirror_count = 1;
327 layout->llot_cur_comp = NULL;
328 INIT_LIST_HEAD(&layout->llot_comp_list);
334 * Allocate and initialize a new plain layout.
336 * \retval valid llapi_layout pointer on success
337 * \retval NULL if memory allocation fails
339 struct llapi_layout *llapi_layout_alloc(void)
341 struct llapi_layout_comp *comp;
342 struct llapi_layout *layout;
344 layout = __llapi_layout_alloc();
348 comp = __llapi_comp_alloc(0);
354 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
355 layout->llot_cur_comp = comp;
361 * Check if the given \a lum_size is large enough to hold the required
364 * \param[in] lum the struct lov_user_md to check
365 * \param[in] lum_size the number of bytes in \a lum
367 * \retval true the \a lum_size is too small
368 * \retval false the \a lum_size is large enough
370 static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
374 if (lum_size < sizeof(lum->lmm_magic))
377 if (lum->lmm_magic == LOV_MAGIC_V1 ||
378 lum->lmm_magic == __swab32(LOV_MAGIC_V1))
379 magic = LOV_MAGIC_V1;
380 else if (lum->lmm_magic == LOV_MAGIC_V3 ||
381 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
382 magic = LOV_MAGIC_V3;
383 else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
384 lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
385 magic = LOV_MAGIC_COMP_V1;
389 if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
390 return lum_size < lov_user_md_size(0, magic);
392 return lum_size < sizeof(struct lov_comp_md_v1);
395 /* Verify if the objects count in lum is consistent with the
396 * stripe count in lum. It applies to regular file only. */
397 static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
399 struct lov_comp_md_v1 *comp_v1 = NULL;
400 int i, ent_count, obj_count;
402 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
403 comp_v1 = (struct lov_comp_md_v1 *)lum;
404 ent_count = comp_v1->lcm_entry_count;
405 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
406 lum->lmm_magic == LOV_MAGIC_V3) {
412 for (i = 0; i < ent_count; i++) {
414 lum = (struct lov_user_md *)((char *)comp_v1 +
415 comp_v1->lcm_entries[i].lcme_offset);
416 lum_size = comp_v1->lcm_entries[i].lcme_size;
418 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
421 if (!(comp_v1->lcm_entries[i].lcme_flags &
422 LCME_FL_INIT) && obj_count != 0)
424 } else if (obj_count != lum->lmm_stripe_count) {
432 * Convert the data from a lov_user_md to a newly allocated llapi_layout.
433 * The caller is responsible for freeing the returned pointer.
435 * \param[in] lov_xattr LOV user metadata xattr to copy data from
436 * \param[in] lov_xattr_size size the lov_xattr_size passed in
437 * \param[in] flags flags to control how layout is retrieved
439 * \retval valid llapi_layout pointer on success
440 * \retval NULL if memory allocation fails
442 struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
443 ssize_t lov_xattr_size,
444 enum llapi_layout_get_flags flags)
446 struct lov_user_md *lum = lov_xattr;
447 struct lov_comp_md_v1 *comp_v1 = NULL;
448 struct lov_comp_md_entry_v1 *ent;
449 struct lov_user_md *v1;
450 struct llapi_layout *layout = NULL;
451 struct llapi_layout_comp *comp;
452 int i, ent_count = 0, obj_count;
454 if (lov_xattr == NULL || lov_xattr_size <= 0) {
459 /* Return an error if we got back a partial layout. */
460 if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) {
465 #if __BYTE_ORDER == __BIG_ENDIAN
466 if (flags & LLAPI_LAYOUT_GET_COPY) {
467 lum = malloc(lov_xattr_size);
472 memcpy(lum, lov_xattr, lov_xattr_size);
476 llapi_layout_swab_lov_user_md(lum, lov_xattr_size);
478 #if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 16, 53, 0)
479 #define LLAPI_LXF_CHECK_OLD 0x0001
480 if (flags & LLAPI_LXF_CHECK_OLD)
481 flags = (flags & ~LLAPI_LXF_CHECK_OLD) | LLAPI_LAYOUT_GET_CHECK;
483 if ((flags & LLAPI_LAYOUT_GET_CHECK) &&
484 !llapi_layout_lum_valid(lum, lov_xattr_size)) {
489 layout = __llapi_layout_alloc();
490 if (layout == NULL) {
495 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
496 comp_v1 = (struct lov_comp_md_v1 *)lum;
497 ent_count = comp_v1->lcm_entry_count;
498 layout->llot_gen = comp_v1->lcm_layout_gen;
499 layout->llot_is_composite = true;
500 layout->llot_mirror_count = comp_v1->lcm_mirror_count + 1;
501 layout->llot_gen = comp_v1->lcm_layout_gen;
502 layout->llot_flags = comp_v1->lcm_flags;
503 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
504 lum->lmm_magic == LOV_MAGIC_V3) {
506 layout->llot_is_composite = false;
508 if (lov_xattr_size <= 0) {
517 if (ent_count == 0) {
522 v1 = (struct lov_user_md *)lum;
523 for (i = 0; i < ent_count; i++) {
524 if (comp_v1 != NULL) {
525 ent = &comp_v1->lcm_entries[i];
526 v1 = (struct lov_user_md *)((char *)comp_v1 +
528 lov_xattr_size = ent->lcme_size;
533 obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
534 comp = __llapi_comp_alloc(obj_count);
539 comp->llc_extent.e_start = ent->lcme_extent.e_start;
540 comp->llc_extent.e_end = ent->lcme_extent.e_end;
541 comp->llc_id = ent->lcme_id;
542 comp->llc_flags = ent->lcme_flags;
543 if (comp->llc_flags & LCME_FL_NOSYNC)
544 comp->llc_timestamp = ent->lcme_timestamp;
546 comp->llc_extent.e_start = 0;
547 comp->llc_extent.e_end = LUSTRE_EOF;
552 if (v1->lmm_pattern == LOV_PATTERN_RAID0)
553 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
554 else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 |
555 LOV_PATTERN_OVERSTRIPING))
556 comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING;
557 else if (v1->lmm_pattern == LOV_PATTERN_MDT)
558 comp->llc_pattern = LLAPI_LAYOUT_MDT;
560 /* Lustre only supports RAID0, overstripping
563 comp->llc_pattern = v1->lmm_pattern;
565 if (v1->lmm_stripe_size == 0)
566 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
568 comp->llc_stripe_size = v1->lmm_stripe_size;
570 if (v1->lmm_stripe_count == (typeof(v1->lmm_stripe_count))-1)
571 comp->llc_stripe_count = LLAPI_LAYOUT_WIDE;
572 else if (v1->lmm_stripe_count == 0)
573 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
575 comp->llc_stripe_count = v1->lmm_stripe_count;
577 if (v1->lmm_stripe_offset ==
578 (typeof(v1->lmm_stripe_offset))-1)
579 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
581 comp->llc_stripe_offset = v1->lmm_stripe_offset;
583 if (v1->lmm_magic != LOV_USER_MAGIC_V1) {
584 const struct lov_user_md_v3 *lumv3;
585 lumv3 = (struct lov_user_md_v3 *)v1;
586 snprintf(comp->llc_pool_name,
587 sizeof(comp->llc_pool_name),
588 "%s", lumv3->lmm_pool_name);
589 memcpy(comp->llc_objects, lumv3->lmm_objects,
590 obj_count * sizeof(lumv3->lmm_objects[0]));
592 const struct lov_user_md_v1 *lumv1;
593 lumv1 = (struct lov_user_md_v1 *)v1;
594 memcpy(comp->llc_objects, lumv1->lmm_objects,
595 obj_count * sizeof(lumv1->lmm_objects[0]));
599 comp->llc_stripe_offset =
600 comp->llc_objects[0].l_ost_idx;
602 comp->llc_ondisk = true;
603 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
604 layout->llot_cur_comp = comp;
608 if (lum != lov_xattr)
612 llapi_layout_free(layout);
617 __u32 llapi_pattern_to_lov(uint64_t pattern)
622 case LLAPI_LAYOUT_DEFAULT:
623 lov_pattern = LOV_PATTERN_RAID0;
625 case LLAPI_LAYOUT_RAID0:
626 lov_pattern = LOV_PATTERN_RAID0;
628 case LLAPI_LAYOUT_MDT:
629 lov_pattern = LOV_PATTERN_MDT;
631 case LLAPI_LAYOUT_OVERSTRIPING:
632 lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0;
635 lov_pattern = EINVAL;
642 * Convert the data from a llapi_layout to a newly allocated lov_user_md.
643 * The caller is responsible for freeing the returned pointer.
645 * \param[in] layout the layout to copy from
647 * \retval valid lov_user_md pointer on success
648 * \retval NULL if memory allocation fails or the layout is invalid
650 static struct lov_user_md *
651 llapi_layout_to_lum(const struct llapi_layout *layout)
653 struct llapi_layout_comp *comp;
654 struct lov_comp_md_v1 *comp_v1 = NULL;
655 struct lov_comp_md_entry_v1 *ent;
656 struct lov_user_md *lum = NULL;
661 if (layout == NULL ||
662 list_empty((struct list_head *)&layout->llot_comp_list)) {
667 /* Allocate header of lov_comp_md_v1 if necessary */
668 if (layout->llot_is_composite) {
671 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
674 lum_size = sizeof(*comp_v1) + comp_cnt * sizeof(*ent);
675 lum = calloc(lum_size, 1);
680 comp_v1 = (struct lov_comp_md_v1 *)lum;
681 comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
682 comp_v1->lcm_size = lum_size;
683 comp_v1->lcm_layout_gen = 0;
684 comp_v1->lcm_flags = layout->llot_flags;
685 comp_v1->lcm_entry_count = comp_cnt;
686 comp_v1->lcm_mirror_count = layout->llot_mirror_count - 1;
690 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
691 struct lov_user_md *blob;
694 int i, obj_count = 0;
695 struct lov_user_ost_data *lmm_objects;
696 uint64_t pattern = comp->llc_pattern;
698 if ((pattern & LLAPI_LAYOUT_SPECIFIC) != 0) {
699 if (comp->llc_objects_count <
700 comp->llc_stripe_count) {
704 magic = LOV_USER_MAGIC_SPECIFIC;
705 obj_count = comp->llc_stripe_count;
706 pattern &= ~LLAPI_LAYOUT_SPECIFIC;
707 } else if (strlen(comp->llc_pool_name) != 0) {
708 magic = LOV_USER_MAGIC_V3;
710 magic = LOV_USER_MAGIC_V1;
712 /* All stripes must be specified when the pattern contains
713 * LLAPI_LAYOUT_SPECIFIC */
714 for (i = 0; i < obj_count; i++) {
715 if (comp->llc_objects[i].l_ost_idx ==
716 LLAPI_LAYOUT_IDX_MAX) {
722 blob_size = lov_user_md_size(obj_count, magic);
723 blob = realloc(lum, lum_size + blob_size);
729 comp_v1 = (struct lov_comp_md_v1 *)lum;
730 blob = (struct lov_user_md *)((char *)lum + lum_size);
731 lum_size += blob_size;
734 blob->lmm_magic = magic;
735 blob->lmm_pattern = llapi_pattern_to_lov(pattern);
736 if (blob->lmm_pattern == EINVAL) {
741 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
742 blob->lmm_stripe_size = 0;
744 blob->lmm_stripe_size = comp->llc_stripe_size;
746 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
747 blob->lmm_stripe_count = 0;
748 else if (comp->llc_stripe_count == LLAPI_LAYOUT_WIDE)
749 blob->lmm_stripe_count = LOV_ALL_STRIPES;
751 blob->lmm_stripe_count = comp->llc_stripe_count;
753 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
754 blob->lmm_stripe_offset = -1;
756 blob->lmm_stripe_offset = comp->llc_stripe_offset;
758 if (magic == LOV_USER_MAGIC_V3 ||
759 magic == LOV_USER_MAGIC_SPECIFIC) {
760 struct lov_user_md_v3 *lumv3 =
761 (struct lov_user_md_v3 *)blob;
763 if (comp->llc_pool_name[0] != '\0') {
764 strncpy(lumv3->lmm_pool_name,
766 sizeof(lumv3->lmm_pool_name));
768 memset(lumv3->lmm_pool_name, 0,
769 sizeof(lumv3->lmm_pool_name));
771 lmm_objects = lumv3->lmm_objects;
773 lmm_objects = blob->lmm_objects;
776 for (i = 0; i < obj_count; i++)
777 lmm_objects[i].l_ost_idx =
778 comp->llc_objects[i].l_ost_idx;
780 if (layout->llot_is_composite) {
781 ent = &comp_v1->lcm_entries[ent_idx];
782 ent->lcme_id = comp->llc_id;
783 ent->lcme_flags = comp->llc_flags;
784 if (ent->lcme_flags & LCME_FL_NOSYNC)
785 ent->lcme_timestamp = comp->llc_timestamp;
786 ent->lcme_extent.e_start = comp->llc_extent.e_start;
787 ent->lcme_extent.e_end = comp->llc_extent.e_end;
788 ent->lcme_size = blob_size;
789 ent->lcme_offset = offset;
791 comp_v1->lcm_size += blob_size;
805 * Get the parent directory of a path.
807 * \param[in] path path to get parent of
808 * \param[out] buf buffer in which to store parent path
809 * \param[in] size size in bytes of buffer \a buf
811 static void get_parent_dir(const char *path, char *buf, size_t size)
815 strncpy(buf, path, size - 1);
816 p = strrchr(buf, '/');
820 } else if (size >= 2) {
821 strncpy(buf, ".", 2);
822 buf[size - 1] = '\0';
827 * Substitute unspecified attribute values in \a layout with values
828 * from fs global settings. (lov.stripesize, lov.stripecount,
831 * \param[in] layout layout to inherit values from
832 * \param[in] path file path of the filesystem
834 static void inherit_sys_attributes(struct llapi_layout *layout,
837 struct llapi_layout_comp *comp;
838 unsigned int ssize, scount, soffset;
841 rc = sattr_cache_get_defaults(NULL, path, &scount, &ssize, &soffset);
845 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
846 if (comp->llc_pattern == LLAPI_LAYOUT_DEFAULT)
847 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
848 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
849 comp->llc_stripe_size = ssize;
850 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
851 comp->llc_stripe_count = scount;
852 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
853 comp->llc_stripe_offset = soffset;
858 * Get the current component of \a layout.
860 * \param[in] layout layout to get current component
862 * \retval valid llapi_layout_comp pointer on success
863 * \retval NULL on error
865 static struct llapi_layout_comp *
866 __llapi_layout_cur_comp(const struct llapi_layout *layout)
868 struct llapi_layout_comp *comp;
870 if (layout == NULL || layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
874 if (layout->llot_cur_comp == NULL) {
878 /* Verify data consistency */
879 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
880 if (comp == layout->llot_cur_comp)
887 * Test if any attributes of \a layout are specified.
889 * \param[in] layout the layout to check
891 * \retval true any attributes are specified
892 * \retval false all attributes are unspecified
894 static bool is_any_specified(const struct llapi_layout *layout)
896 struct llapi_layout_comp *comp;
898 comp = __llapi_layout_cur_comp(layout);
902 if (layout->llot_is_composite || layout->llot_mirror_count != 1)
905 return comp->llc_pattern != LLAPI_LAYOUT_DEFAULT ||
906 comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT ||
907 comp->llc_stripe_count != LLAPI_LAYOUT_DEFAULT ||
908 comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT ||
909 strlen(comp->llc_pool_name);
913 * Get the striping layout for the file referenced by file descriptor \a fd.
915 * If the filesystem does not support the "lustre." xattr namespace, the
916 * file must be on a non-Lustre filesystem, so set errno to ENOTTY per
917 * convention. If the file has no "lustre.lov" data, the file will
918 * inherit default values, so return a default layout.
920 * If the kernel gives us back less than the expected amount of data,
921 * we fail with errno set to EINTR.
923 * \param[in] fd open file descriptor
924 * \param[in] flags open file descriptor
926 * \retval valid llapi_layout pointer on success
927 * \retval NULL if an error occurs
929 struct llapi_layout *llapi_layout_get_by_fd(int fd,
930 enum llapi_layout_get_flags flags)
933 struct lov_user_md *lum;
934 struct llapi_layout *layout = NULL;
938 lum_len = XATTR_SIZE_MAX;
939 lum = malloc(lum_len);
943 bytes_read = fgetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_len);
944 if (bytes_read < 0) {
945 if (errno == EOPNOTSUPP)
947 else if (errno == ENODATA)
948 layout = llapi_layout_alloc();
952 /* Directories may have a positive non-zero lum->lmm_stripe_count
953 * yet have an empty lum->lmm_objects array. For non-directories the
954 * amount of data returned from the kernel must be consistent
955 * with the stripe count. */
956 if (fstat(fd, &st) < 0)
959 layout = llapi_layout_get_by_xattr(lum, bytes_read,
960 S_ISDIR(st.st_mode) ? 0 : LLAPI_LAYOUT_GET_CHECK);
967 * Get the expected striping layout for a file at \a path.
969 * Substitute expected inherited attribute values for unspecified
970 * attributes. Unspecified attributes may belong to directories and
971 * never-written-to files, and indicate that default values will be
972 * assigned when files are created or first written to. A default value
973 * is inherited from the parent directory if the attribute is specified
974 * there, otherwise it is inherited from the filesystem root.
975 * Unspecified attributes normally have the value LLAPI_LAYOUT_DEFAULT.
977 * The complete \a path need not refer to an existing file or directory,
978 * but some leading portion of it must reside within a lustre filesystem.
979 * A use case for this interface would be to obtain the literal striping
980 * values that would be assigned to a new file in a given directory.
982 * \param[in] path path for which to get the expected layout
984 * \retval valid llapi_layout pointer on success
985 * \retval NULL if an error occurs
987 static struct llapi_layout *llapi_layout_expected(const char *path)
989 struct llapi_layout *path_layout = NULL;
990 char donor_path[PATH_MAX];
995 fd = open(path, O_RDONLY);
996 if (fd < 0 && errno != ENOENT)
1002 path_layout = llapi_layout_get_by_fd(fd, 0);
1008 if (path_layout == NULL) {
1009 if (errno != ENODATA && errno != ENOENT)
1012 path_layout = llapi_layout_alloc();
1013 if (path_layout == NULL)
1017 if (is_any_specified(path_layout)) {
1018 inherit_sys_attributes(path_layout, path);
1022 llapi_layout_free(path_layout);
1024 rc = stat(path, &st);
1025 if (rc < 0 && errno != ENOENT)
1028 /* If path is a not a directory or doesn't exist, inherit layout
1029 * from parent directory. */
1030 if ((rc == 0 && !S_ISDIR(st.st_mode)) ||
1031 (rc < 0 && errno == ENOENT)) {
1032 get_parent_dir(path, donor_path, sizeof(donor_path));
1033 path_layout = llapi_layout_get_by_path(donor_path, 0);
1034 if (path_layout != NULL) {
1035 if (is_any_specified(path_layout)) {
1036 inherit_sys_attributes(path_layout, donor_path);
1039 llapi_layout_free(path_layout);
1043 /* Inherit layout from the filesystem root. */
1044 rc = llapi_search_mounts(path, 0, donor_path, NULL);
1047 path_layout = llapi_layout_get_by_path(donor_path, 0);
1048 if (path_layout == NULL)
1051 inherit_sys_attributes(path_layout, donor_path);
1056 * Get the striping layout for the file at \a path.
1058 * If \a flags contains LLAPI_LAYOUT_GET_EXPECTED, substitute
1059 * expected inherited attribute values for unspecified attributes. See
1060 * llapi_layout_expected().
1062 * \param[in] path path for which to get the layout
1063 * \param[in] flags flags to control how layout is retrieved
1065 * \retval valid llapi_layout pointer on success
1066 * \retval NULL if an error occurs
1068 struct llapi_layout *llapi_layout_get_by_path(const char *path,
1069 enum llapi_layout_get_flags flags)
1071 struct llapi_layout *layout = NULL;
1075 if (flags & LLAPI_LAYOUT_GET_EXPECTED)
1076 return llapi_layout_expected(path);
1078 fd = open(path, O_RDONLY);
1082 layout = llapi_layout_get_by_fd(fd, flags);
1091 * Get the layout for the file with FID \a fidstr in filesystem \a lustre_dir.
1093 * \param[in] lustre_dir path within Lustre filesystem containing \a fid
1094 * \param[in] fid Lustre identifier of file to get layout for
1096 * \retval valid llapi_layout pointer on success
1097 * \retval NULL if an error occurs
1099 struct llapi_layout *llapi_layout_get_by_fid(const char *lustre_dir,
1100 const struct lu_fid *fid,
1101 enum llapi_layout_get_flags flags)
1105 int saved_msg_level = llapi_msg_get_level();
1106 struct llapi_layout *layout = NULL;
1108 /* Prevent llapi internal routines from writing to console
1109 * while executing this function, then restore previous message
1111 llapi_msg_set_level(LLAPI_MSG_OFF);
1112 fd = llapi_open_by_fid(lustre_dir, fid, O_RDONLY);
1113 llapi_msg_set_level(saved_msg_level);
1118 layout = llapi_layout_get_by_fd(fd, flags);
1127 * Get the stripe count of \a layout.
1129 * \param[in] layout layout to get stripe count from
1130 * \param[out] count integer to store stripe count in
1132 * \retval 0 on success
1133 * \retval -1 if arguments are invalid
1135 int llapi_layout_stripe_count_get(const struct llapi_layout *layout,
1138 struct llapi_layout_comp *comp;
1140 comp = __llapi_layout_cur_comp(layout);
1144 if (count == NULL) {
1149 *count = comp->llc_stripe_count;
1155 * The llapi_layout API functions have these extra validity checks since
1156 * they use intuitively named macros to denote special behavior, whereas
1157 * the old API uses 0 and -1.
1160 bool llapi_layout_stripe_count_is_valid(int64_t stripe_count)
1162 return stripe_count == LLAPI_LAYOUT_DEFAULT ||
1163 stripe_count == LLAPI_LAYOUT_WIDE ||
1164 (stripe_count != 0 && stripe_count != -1 &&
1165 llapi_stripe_count_is_valid(stripe_count));
1168 static bool llapi_layout_extension_size_is_valid(uint64_t ext_size)
1170 return (ext_size != 0 &&
1171 llapi_stripe_size_is_aligned(ext_size) &&
1172 !llapi_stripe_size_is_too_big(ext_size));
1175 static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size)
1177 return stripe_size == LLAPI_LAYOUT_DEFAULT ||
1178 (stripe_size != 0 &&
1179 llapi_stripe_size_is_aligned(stripe_size) &&
1180 !llapi_stripe_size_is_too_big(stripe_size));
1183 static bool llapi_layout_stripe_index_is_valid(int64_t stripe_index)
1185 return stripe_index == LLAPI_LAYOUT_DEFAULT ||
1186 (stripe_index >= 0 &&
1187 llapi_stripe_index_is_valid(stripe_index));
1191 * Set the stripe count of \a layout.
1193 * \param[in] layout layout to set stripe count in
1194 * \param[in] count value to be set
1196 * \retval 0 on success
1197 * \retval -1 if arguments are invalid
1199 int llapi_layout_stripe_count_set(struct llapi_layout *layout,
1202 struct llapi_layout_comp *comp;
1204 comp = __llapi_layout_cur_comp(layout);
1208 if (!llapi_layout_stripe_count_is_valid(count)) {
1213 comp->llc_stripe_count = count;
1219 * Get the stripe/extension size of \a layout.
1221 * \param[in] layout layout to get stripe size from
1222 * \param[out] size integer to store stripe size in
1223 * \param[in] extension flag if extenion size is requested
1225 * \retval 0 on success
1226 * \retval -1 if arguments are invalid
1228 static int layout_stripe_size_get(const struct llapi_layout *layout,
1229 uint64_t *size, bool extension)
1231 struct llapi_layout_comp *comp;
1234 comp = __llapi_layout_cur_comp(layout);
1243 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1244 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1249 *size = comp->llc_stripe_size;
1250 if (comp->llc_flags & LCME_FL_EXTENSION)
1251 *size *= SEL_UNIT_SIZE;
1256 int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
1259 return layout_stripe_size_get(layout, size, false);
1262 int llapi_layout_extension_size_get(const struct llapi_layout *layout,
1265 return layout_stripe_size_get(layout, size, true);
1269 * Set the stripe/extension size of \a layout.
1271 * \param[in] layout layout to set stripe size in
1272 * \param[in] size value to be set
1273 * \param[in] extension flag if extenion size is passed
1275 * \retval 0 on success
1276 * \retval -1 if arguments are invalid
1278 static int layout_stripe_size_set(struct llapi_layout *layout,
1279 uint64_t size, bool extension)
1281 struct llapi_layout_comp *comp;
1284 comp = __llapi_layout_cur_comp(layout);
1288 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1289 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1295 size /= SEL_UNIT_SIZE;
1297 if ((comp_ext && !llapi_layout_extension_size_is_valid(size)) ||
1298 (!comp_ext && !llapi_layout_stripe_size_is_valid(size))) {
1303 comp->llc_stripe_size = size;
1307 int llapi_layout_stripe_size_set(struct llapi_layout *layout,
1310 return layout_stripe_size_set(layout, size, false);
1313 int llapi_layout_extension_size_set(struct llapi_layout *layout,
1316 return layout_stripe_size_set(layout, size, true);
1320 * Get the RAID pattern of \a layout.
1322 * \param[in] layout layout to get pattern from
1323 * \param[out] pattern integer to store pattern in
1325 * \retval 0 on success
1326 * \retval -1 if arguments are invalid
1328 int llapi_layout_pattern_get(const struct llapi_layout *layout,
1331 struct llapi_layout_comp *comp;
1333 comp = __llapi_layout_cur_comp(layout);
1337 if (pattern == NULL) {
1342 *pattern = comp->llc_pattern;
1348 * Set the pattern of \a layout.
1350 * \param[in] layout layout to set pattern in
1351 * \param[in] pattern value to be set
1353 * \retval 0 on success
1354 * \retval -1 if arguments are invalid or RAID pattern
1357 int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
1359 struct llapi_layout_comp *comp;
1361 comp = __llapi_layout_cur_comp(layout);
1365 if (pattern != LLAPI_LAYOUT_DEFAULT &&
1366 pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT
1367 && pattern != LLAPI_LAYOUT_OVERSTRIPING) {
1372 comp->llc_pattern = pattern |
1373 (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC);
1378 static inline int stripe_number_roundup(int stripe_number)
1380 unsigned int round_up = (stripe_number + 8) & ~7;
1381 return round_up > LOV_MAX_STRIPE_COUNT ?
1382 LOV_MAX_STRIPE_COUNT : round_up;
1386 * Set the OST index of stripe number \a stripe_number to \a ost_index.
1388 * If only the starting stripe's OST index is specified, then this can use
1389 * the normal LOV_MAGIC_{V1,V3} layout type. If multiple OST indices are
1390 * given, then allocate an array to hold the list of indices and ensure that
1391 * the LOV_USER_MAGIC_SPECIFIC layout is used when creating the file.
1393 * \param[in] layout layout to set OST index in
1394 * \param[in] stripe_number stripe number to set index for
1395 * \param[in] ost_index the index to set
1397 * \retval 0 on success
1398 * \retval -1 if arguments are invalid or an unsupported stripe number
1399 * was specified, error returned in errno
1401 int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number,
1404 struct llapi_layout_comp *comp;
1406 comp = __llapi_layout_cur_comp(layout);
1410 if (!llapi_layout_stripe_index_is_valid(ost_index)) {
1415 if (stripe_number == 0 && ost_index == LLAPI_LAYOUT_DEFAULT) {
1416 comp->llc_stripe_offset = ost_index;
1417 comp->llc_pattern &= ~LLAPI_LAYOUT_SPECIFIC;
1418 __llapi_comp_objects_realloc(comp, 0);
1419 } else if (stripe_number >= 0 &&
1420 stripe_number < LOV_MAX_STRIPE_COUNT) {
1421 if (ost_index >= LLAPI_LAYOUT_IDX_MAX) {
1426 /* Preallocate a few more stripes to avoid realloc() overhead.*/
1427 if (__llapi_comp_objects_realloc(comp,
1428 stripe_number_roundup(stripe_number)) < 0)
1431 comp->llc_objects[stripe_number].l_ost_idx = ost_index;
1433 if (stripe_number == 0)
1434 comp->llc_stripe_offset = ost_index;
1436 comp->llc_pattern |= LLAPI_LAYOUT_SPECIFIC;
1438 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT ||
1439 comp->llc_stripe_count <= stripe_number)
1440 comp->llc_stripe_count = stripe_number + 1;
1450 * Get the OST index associated with stripe \a stripe_number.
1452 * Stripes are indexed starting from zero.
1454 * \param[in] layout layout to get index from
1455 * \param[in] stripe_number stripe number to get index for
1456 * \param[out] index integer to store index in
1458 * \retval 0 on success
1459 * \retval -1 if arguments are invalid
1461 int llapi_layout_ost_index_get(const struct llapi_layout *layout,
1462 uint64_t stripe_number, uint64_t *index)
1464 struct llapi_layout_comp *comp;
1466 comp = __llapi_layout_cur_comp(layout);
1470 if (index == NULL) {
1475 if (stripe_number >= comp->llc_stripe_count ||
1476 stripe_number >= comp->llc_objects_count) {
1481 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
1482 *index = LLAPI_LAYOUT_DEFAULT;
1484 *index = comp->llc_objects[stripe_number].l_ost_idx;
1491 * Get the pool name of layout \a layout.
1493 * \param[in] layout layout to get pool name from
1494 * \param[out] dest buffer to store pool name in
1495 * \param[in] n size in bytes of buffer \a dest
1497 * \retval 0 on success
1498 * \retval -1 if arguments are invalid
1500 int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest,
1503 struct llapi_layout_comp *comp;
1505 comp = __llapi_layout_cur_comp(layout);
1514 strncpy(dest, comp->llc_pool_name, n);
1520 * Set the name of the pool of layout \a layout.
1522 * \param[in] layout layout to set pool name in
1523 * \param[in] pool_name pool name to set
1525 * \retval 0 on success
1526 * \retval -1 if arguments are invalid or pool name is too long
1528 int llapi_layout_pool_name_set(struct llapi_layout *layout,
1531 struct llapi_layout_comp *comp;
1533 comp = __llapi_layout_cur_comp(layout);
1537 if (!llapi_pool_name_is_valid(&pool_name, NULL)) {
1542 strncpy(comp->llc_pool_name, pool_name, sizeof(comp->llc_pool_name));
1547 * Open and possibly create a file with a given \a layout.
1549 * If \a layout is NULL this function acts as a simple wrapper for
1550 * open(). By convention, ENOTTY is returned in errno if \a path
1551 * refers to a non-Lustre file.
1553 * \param[in] path name of the file to open
1554 * \param[in] open_flags open() flags
1555 * \param[in] mode permissions to create file, filtered by umask
1556 * \param[in] layout layout to create new file with
1558 * \retval non-negative file descriptor on successful open
1559 * \retval -1 if an error occurred
1561 int llapi_layout_file_open(const char *path, int open_flags, mode_t mode,
1562 const struct llapi_layout *layout)
1567 struct lov_user_md *lum;
1571 (layout != NULL && layout->llot_magic != LLAPI_LAYOUT_MAGIC)) {
1577 rc = llapi_layout_sanity((struct llapi_layout *)layout,
1579 !!(layout->llot_mirror_count > 1));
1581 llapi_layout_sanity_perror(rc);
1586 /* Object creation must be postponed until after layout attributes
1587 * have been applied. */
1588 if (layout != NULL && (open_flags & O_CREAT))
1589 open_flags |= O_LOV_DELAY_CREATE;
1591 fd = open(path, open_flags, mode);
1593 if (layout == NULL || fd < 0)
1596 lum = llapi_layout_to_lum(layout);
1605 if (lum->lmm_magic == LOV_USER_MAGIC_COMP_V1)
1606 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
1607 else if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC)
1608 lum_size = lov_user_md_size(lum->lmm_stripe_count,
1611 lum_size = lov_user_md_size(0, lum->lmm_magic);
1613 rc = fsetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_size, 0);
1622 errno = errno == EOPNOTSUPP ? ENOTTY : errno;
1628 * Create a file with a given \a layout.
1630 * Force O_CREAT and O_EXCL flags on so caller is assured that file was
1631 * created with the given \a layout on successful function return.
1633 * \param[in] path name of the file to open
1634 * \param[in] open_flags open() flags
1635 * \param[in] mode permissions to create new file with
1636 * \param[in] layout layout to create new file with
1638 * \retval non-negative file descriptor on successful open
1639 * \retval -1 if an error occurred
1641 int llapi_layout_file_create(const char *path, int open_flags, int mode,
1642 const struct llapi_layout *layout)
1644 return llapi_layout_file_open(path, open_flags|O_CREAT|O_EXCL, mode,
1648 int llapi_layout_flags_get(struct llapi_layout *layout, uint32_t *flags)
1650 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1655 *flags = layout->llot_flags;
1660 * Set flags to the header of a component layout.
1662 int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
1664 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1669 layout->llot_flags = flags;
1673 const char *llapi_layout_flags_string(uint32_t flags)
1675 switch (flags & LCM_FL_FLR_MASK) {
1678 case LCM_FL_WRITE_PENDING:
1680 case LCM_FL_SYNC_PENDING:
1687 const __u16 llapi_layout_string_flags(char *string)
1689 if (strncmp(string, "ro", strlen(string)) == 0)
1690 return LCM_FL_RDONLY;
1691 if (strncmp(string, "wp", strlen(string)) == 0)
1692 return LCM_FL_WRITE_PENDING;
1693 if (strncmp(string, "sp", strlen(string)) == 0)
1694 return LCM_FL_SYNC_PENDING;
1700 * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
1701 * @count: Mirror count value to be checked.
1703 * This function checks the validity of mirror count.
1705 * Return: true on success or false on failure.
1707 static bool llapi_layout_mirror_count_is_valid(uint16_t count)
1709 return count >= 0 && count <= LUSTRE_MIRROR_COUNT_MAX;
1713 * llapi_layout_mirror_count_get() - Get mirror count from the header of
1715 * @layout: Layout to get mirror count from.
1716 * @count: Returned mirror count value.
1718 * This function gets mirror count from the header of a layout.
1720 * Return: 0 on success or -1 on failure.
1722 int llapi_layout_mirror_count_get(struct llapi_layout *layout,
1725 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1730 *count = layout->llot_mirror_count;
1735 * llapi_layout_mirror_count_set() - Set mirror count to the header of a layout.
1736 * @layout: Layout to set mirror count in.
1737 * @count: Mirror count value to be set.
1739 * This function sets mirror count to the header of a layout.
1741 * Return: 0 on success or -1 on failure.
1743 int llapi_layout_mirror_count_set(struct llapi_layout *layout,
1746 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1751 if (!llapi_layout_mirror_count_is_valid(count)) {
1756 layout->llot_mirror_count = count;
1761 * Fetch the start and end offset of the current layout component.
1763 * \param[in] layout the layout component
1764 * \param[out] start extent start, inclusive
1765 * \param[out] end extent end, exclusive
1767 * \retval 0 on success
1768 * \retval <0 if error occurs
1770 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
1771 uint64_t *start, uint64_t *end)
1773 struct llapi_layout_comp *comp;
1775 comp = __llapi_layout_cur_comp(layout);
1779 if (start == NULL || end == NULL) {
1784 *start = comp->llc_extent.e_start;
1785 *end = comp->llc_extent.e_end;
1791 * Set the layout extent of a layout.
1793 * \param[in] layout the layout to be set
1794 * \param[in] start extent start, inclusive
1795 * \param[in] end extent end, exclusive
1797 * \retval 0 on success
1798 * \retval <0 if error occurs
1800 int llapi_layout_comp_extent_set(struct llapi_layout *layout,
1801 uint64_t start, uint64_t end)
1803 struct llapi_layout_comp *comp;
1805 comp = __llapi_layout_cur_comp(layout);
1814 comp->llc_extent.e_start = start;
1815 comp->llc_extent.e_end = end;
1816 layout->llot_is_composite = true;
1822 * Gets the attribute flags of the current component.
1824 * \param[in] layout the layout component
1825 * \param[out] flags stored the returned component flags
1827 * \retval 0 on success
1828 * \retval <0 if error occurs
1830 int llapi_layout_comp_flags_get(const struct llapi_layout *layout,
1833 struct llapi_layout_comp *comp;
1835 comp = __llapi_layout_cur_comp(layout);
1839 if (flags == NULL) {
1844 *flags = comp->llc_flags;
1850 * Sets the specified flags of the current component leaving other flags as-is.
1852 * \param[in] layout the layout component
1853 * \param[in] flags component flags to be set
1855 * \retval 0 on success
1856 * \retval <0 if error occurs
1858 int llapi_layout_comp_flags_set(struct llapi_layout *layout, uint32_t flags)
1860 struct llapi_layout_comp *comp;
1862 comp = __llapi_layout_cur_comp(layout);
1866 comp->llc_flags |= flags;
1872 * Clears the flags specified in the flags leaving other flags as-is.
1874 * \param[in] layout the layout component
1875 * \param[in] flags component flags to be cleared
1877 * \retval 0 on success
1878 * \retval <0 if error occurs
1880 int llapi_layout_comp_flags_clear(struct llapi_layout *layout,
1883 struct llapi_layout_comp *comp;
1885 comp = __llapi_layout_cur_comp(layout);
1889 comp->llc_flags &= ~flags;
1895 * Fetches the file-unique component ID of the current layout component.
1897 * \param[in] layout the layout component
1898 * \param[out] id stored the returned component ID
1900 * \retval 0 on success
1901 * \retval <0 if error occurs
1903 int llapi_layout_comp_id_get(const struct llapi_layout *layout, uint32_t *id)
1905 struct llapi_layout_comp *comp;
1907 comp = __llapi_layout_cur_comp(layout);
1921 * Return the mirror id of the current layout component.
1923 * \param[in] layout the layout component
1924 * \param[out] id stored the returned mirror ID
1926 * \retval 0 on success
1927 * \retval <0 if error occurs
1929 int llapi_layout_mirror_id_get(const struct llapi_layout *layout, uint32_t *id)
1931 struct llapi_layout_comp *comp;
1933 comp = __llapi_layout_cur_comp(layout);
1942 *id = mirror_id_of(comp->llc_id);
1948 * Adds a component to \a layout, the new component will be added to
1949 * the tail of components list and it'll inherit attributes of existing
1950 * ones. The \a layout will change it's current component pointer to
1951 * the newly added component, and it'll be turned into a composite
1952 * layout if it was not before the adding.
1954 * \param[in] layout existing composite or plain layout
1956 * \retval 0 on success
1957 * \retval <0 if error occurs
1959 int llapi_layout_comp_add(struct llapi_layout *layout)
1961 struct llapi_layout_comp *last, *comp, *new;
1962 bool composite = layout->llot_is_composite;
1964 comp = __llapi_layout_cur_comp(layout);
1968 new = __llapi_comp_alloc(0);
1972 last = list_entry(layout->llot_comp_list.prev, typeof(*last),
1975 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1977 /* We must mark the layout composite for the sanity check, but it may
1978 * not stay that way if the check fails */
1979 layout->llot_is_composite = true;
1980 layout->llot_cur_comp = new;
1982 /* We need to set a temporary non-zero value for "end" when we call
1983 * comp_extent_set, so we use LUSTRE_EOF-1, which is > all allowed
1984 * for the end of the previous component. (If we're adding this
1985 * component, the end of the previous component cannot be EOF.) */
1986 if (llapi_layout_comp_extent_set(layout, last->llc_extent.e_end,
1988 llapi_layout_comp_del(layout);
1989 layout->llot_is_composite = composite;
1996 * Adds a first component of a mirror to \a layout.
1997 * The \a layout will change it's current component pointer to
1998 * the newly added component, and it'll be turned into a composite
1999 * layout if it was not before the adding.
2001 * \param[in] layout existing composite or plain layout
2003 * \retval 0 on success
2004 * \retval <0 if error occurs
2006 int llapi_layout_add_first_comp(struct llapi_layout *layout)
2008 struct llapi_layout_comp *comp, *new;
2010 comp = __llapi_layout_cur_comp(layout);
2014 new = __llapi_comp_alloc(0);
2018 new->llc_extent.e_start = 0;
2020 list_add_tail(&new->llc_list, &layout->llot_comp_list);
2021 layout->llot_cur_comp = new;
2022 layout->llot_is_composite = true;
2028 * Deletes current component from the composite layout. The component
2029 * to be deleted must be the tail of components list, and it can't be
2030 * the only component in the layout.
2032 * \param[in] layout composite layout
2034 * \retval 0 on success
2035 * \retval <0 if error occurs
2037 int llapi_layout_comp_del(struct llapi_layout *layout)
2039 struct llapi_layout_comp *comp;
2041 comp = __llapi_layout_cur_comp(layout);
2045 if (!layout->llot_is_composite) {
2050 /* It must be the tail of the list (for PFL, can be relaxed
2051 * once we get mirrored components) */
2052 if (comp->llc_list.next != &layout->llot_comp_list) {
2056 layout->llot_cur_comp =
2057 list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
2058 if (comp->llc_list.prev == &layout->llot_comp_list)
2059 layout->llot_cur_comp = NULL;
2061 list_del_init(&comp->llc_list);
2062 __llapi_comp_free(comp);
2068 * Move the current component pointer to the component with
2069 * specified component ID.
2071 * \param[in] layout composite layout
2072 * \param[in] id component ID
2074 * \retval =0 : moved successfully
2075 * \retval <0 if error occurs
2077 int llapi_layout_comp_use_id(struct llapi_layout *layout, uint32_t comp_id)
2079 struct llapi_layout_comp *comp;
2081 comp = __llapi_layout_cur_comp(layout);
2083 return -1; /* use previously set errno */
2085 if (!layout->llot_is_composite) {
2090 if (comp_id == LCME_ID_INVAL) {
2095 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
2096 if (comp->llc_id == comp_id) {
2097 layout->llot_cur_comp = comp;
2106 * Move the current component pointer to a specified position.
2108 * \param[in] layout composite layout
2109 * \param[in] pos the position to be moved, it can be:
2110 * LLAPI_LAYOUT_COMP_USE_FIRST: use first component
2111 * LLAPI_LAYOUT_COMP_USE_LAST: use last component
2112 * LLAPI_LAYOUT_COMP_USE_NEXT: use component after current
2113 * LLAPI_LAYOUT_COMP_USE_PREV: use component before current
2115 * \retval =0 : moved successfully
2116 * \retval =1 : at last component with NEXT, at first component with PREV
2117 * \retval <0 if error occurs
2119 int llapi_layout_comp_use(struct llapi_layout *layout,
2120 enum llapi_layout_comp_use pos)
2122 struct llapi_layout_comp *comp, *head, *tail;
2124 comp = __llapi_layout_cur_comp(layout);
2128 if (!layout->llot_is_composite) {
2129 if (pos == LLAPI_LAYOUT_COMP_USE_FIRST ||
2130 pos == LLAPI_LAYOUT_COMP_USE_LAST)
2136 head = list_entry(layout->llot_comp_list.next, typeof(*head), llc_list);
2137 tail = list_entry(layout->llot_comp_list.prev, typeof(*tail), llc_list);
2139 case LLAPI_LAYOUT_COMP_USE_FIRST:
2140 layout->llot_cur_comp = head;
2142 case LLAPI_LAYOUT_COMP_USE_NEXT:
2147 layout->llot_cur_comp = list_entry(comp->llc_list.next,
2148 typeof(*comp), llc_list);
2150 case LLAPI_LAYOUT_COMP_USE_LAST:
2151 layout->llot_cur_comp = tail;
2153 case LLAPI_LAYOUT_COMP_USE_PREV:
2158 layout->llot_cur_comp = list_entry(comp->llc_list.prev,
2159 typeof(*comp), llc_list);
2170 * Add layout component(s) to an existing file.
2172 * \param[in] path The path name of the file
2173 * \param[in] layout The layout component(s) to be added
2175 int llapi_layout_file_comp_add(const char *path,
2176 const struct llapi_layout *layout)
2178 int rc, fd = -1, lum_size, tmp_errno = 0;
2179 struct llapi_layout *existing_layout = NULL;
2180 struct lov_user_md *lum = NULL;
2182 if (path == NULL || layout == NULL ||
2183 layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
2188 fd = open(path, O_RDWR);
2195 existing_layout = llapi_layout_get_by_fd(fd, 0);
2196 if (existing_layout == NULL) {
2202 rc = llapi_layout_merge(&existing_layout, layout);
2209 rc = llapi_layout_sanity(existing_layout, path, false, false);
2212 llapi_layout_sanity_perror(rc);
2217 lum = llapi_layout_to_lum(layout);
2224 if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) {
2229 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2231 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".add", lum, lum_size, 0);
2241 llapi_layout_free(existing_layout);
2247 * Delete component(s) by the specified component id or component flags
2248 * from an existing file.
2250 * \param[in] path path name of the file
2251 * \param[in] id unique component ID
2252 * \param[in] flags flags: LCME_FL_* or;
2253 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2255 int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags)
2257 int rc = 0, fd = -1, lum_size, tmp_errno = 0;
2258 struct llapi_layout *layout;
2259 struct llapi_layout_comp *comp, *next;
2260 struct llapi_layout *existing_layout = NULL;
2261 struct lov_user_md *lum = NULL;
2263 if (path == NULL || id > LCME_ID_MAX || (flags & ~LCME_KNOWN_FLAGS)) {
2268 /* Can only specify ID or flags, not both, not none. */
2269 if ((id != LCME_ID_INVAL && flags != 0) ||
2270 (id == LCME_ID_INVAL && flags == 0)) {
2275 layout = llapi_layout_alloc();
2279 llapi_layout_comp_extent_set(layout, 0, LUSTRE_EOF);
2280 comp = __llapi_layout_cur_comp(layout);
2288 comp->llc_flags = flags;
2290 lum = llapi_layout_to_lum(layout);
2296 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2298 fd = open(path, O_RDWR);
2305 existing_layout = llapi_layout_get_by_fd(fd, 0);
2306 if (existing_layout == NULL) {
2314 while (rc == 0 && existing_layout->llot_cur_comp != NULL) {
2315 rc = llapi_layout_comp_use(existing_layout, comp ?
2316 LLAPI_LAYOUT_COMP_USE_PREV :
2317 LLAPI_LAYOUT_COMP_USE_LAST);
2322 comp = __llapi_layout_cur_comp(existing_layout);
2328 if (id != LCME_ID_INVAL && id != comp->llc_id)
2330 else if ((flags & LCME_FL_NEG) && (flags & comp->llc_flags))
2332 else if (flags && !(flags & comp->llc_flags))
2335 rc = llapi_layout_comp_del(existing_layout);
2336 /* the layout position is moved to previous one, adjust */
2344 rc = llapi_layout_sanity(existing_layout, path, false, false);
2347 llapi_layout_sanity_perror(rc);
2352 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".del", lum, lum_size, 0);
2363 llapi_layout_free(layout);
2364 llapi_layout_free(existing_layout);
2370 /* Internal utility function to apply flags for sanity checking */
2371 static void llapi_layout_comp_apply_flags(struct llapi_layout_comp *comp,
2374 if (flags & LCME_FL_NEG)
2375 comp->llc_flags &= ~flags;
2377 comp->llc_flags |= flags;
2380 struct llapi_layout_apply_flags_args {
2382 uint32_t *lfa_flags;
2388 static int llapi_layout_apply_flags_cb(struct llapi_layout *layout,
2391 struct llapi_layout_apply_flags_args *args = arg;
2392 struct llapi_layout_comp *comp;
2395 comp = __llapi_layout_cur_comp(layout);
2398 return LLAPI_LAYOUT_ITER_STOP;
2401 for (i = 0; i < args->lfa_count; i++) {
2402 if (comp->llc_id == args->lfa_ids[i])
2403 llapi_layout_comp_apply_flags(comp, args->lfa_flags[i]);
2406 return LLAPI_LAYOUT_ITER_CONT;
2409 /* Apply flags to the layout for sanity checking */
2410 static int llapi_layout_apply_flags(struct llapi_layout *layout, uint32_t *ids,
2411 uint32_t *flags, int count)
2413 struct llapi_layout_apply_flags_args args;
2416 if (!ids || !flags || count == 0) {
2422 args.lfa_flags = flags;
2423 args.lfa_count = count;
2426 rc = llapi_layout_comp_iterate(layout,
2427 llapi_layout_apply_flags_cb,
2429 if (errno == ENOENT)
2432 if (rc != LLAPI_LAYOUT_ITER_CONT)
2438 * Change flags by component ID of components of an existing file.
2439 * The component to be modified is specified by the comp->lcme_id value,
2440 * which must be a unique component ID.
2442 * \param[in] path path name of the file
2443 * \param[in] ids An array of component IDs
2444 * \param[in] flags flags: LCME_FL_* or;
2445 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2446 * \param[in] count Number of elements in ids and flags array
2448 int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags,
2451 int rc = -1, fd = -1, i, tmp_errno = 0;
2453 struct llapi_layout *existing_layout = NULL;
2454 struct llapi_layout *layout = NULL;
2455 struct llapi_layout_comp *comp;
2456 struct lov_user_md *lum = NULL;
2466 for (i = 0; i < count; i++) {
2467 if (!ids[i] || !flags[i]) {
2472 if (ids[i] > LCME_ID_MAX || (flags[i] & ~LCME_KNOWN_FLAGS)) {
2477 /* do not allow to set or clear INIT flag */
2478 if (flags[i] & LCME_FL_INIT) {
2484 fd = open(path, O_RDWR);
2491 existing_layout = llapi_layout_get_by_fd(fd, 0);
2492 if (existing_layout == NULL) {
2498 if (llapi_layout_apply_flags(existing_layout, ids, flags, count)) {
2504 rc = llapi_layout_sanity(existing_layout, path, false, false);
2507 llapi_layout_sanity_perror(rc);
2512 layout = __llapi_layout_alloc();
2513 if (layout == NULL) {
2519 layout->llot_is_composite = true;
2520 for (i = 0; i < count; i++) {
2521 comp = __llapi_comp_alloc(0);
2528 comp->llc_id = ids[i];
2529 comp->llc_flags = flags[i];
2531 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
2532 layout->llot_cur_comp = comp;
2535 lum = llapi_layout_to_lum(layout);
2542 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2544 /* flush cached pages from clients */
2545 rc = llapi_file_flush(fd);
2552 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0);
2565 llapi_layout_free(existing_layout);
2566 llapi_layout_free(layout);
2572 * Check if the file layout is composite.
2574 * \param[in] layout the file layout to check
2576 * \retval true composite
2577 * \retval false not composite
2579 bool llapi_layout_is_composite(struct llapi_layout *layout)
2581 return layout->llot_is_composite;
2585 * Iterate every components in the @layout and call callback function @cb.
2587 * \param[in] layout component layout list.
2588 * \param[in] cb callback for each component
2589 * \param[in] cbdata callback data
2591 * \retval < 0 error happens during the iteration
2592 * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error
2593 * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration
2595 int llapi_layout_comp_iterate(struct llapi_layout *layout,
2596 llapi_layout_iter_cb cb, void *cbdata)
2600 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2605 * make sure on success llapi_layout_comp_use() API returns 0 with
2611 rc = cb(layout, cbdata);
2612 if (rc != LLAPI_LAYOUT_ITER_CONT)
2615 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2618 else if (rc == 1) /* reached the last comp */
2619 return LLAPI_LAYOUT_ITER_CONT;
2626 * llapi_layout_merge() - Merge a composite layout into another one.
2627 * @dst_layout: Destination composite layout.
2628 * @src_layout: Source composite layout.
2630 * This function copies all of the components from @src_layout and
2631 * appends them to @dst_layout.
2633 * Return: 0 on success or -1 on failure.
2635 int llapi_layout_merge(struct llapi_layout **dst_layout,
2636 const struct llapi_layout *src_layout)
2638 struct llapi_layout *new_layout = *dst_layout;
2639 struct llapi_layout_comp *new = NULL;
2640 struct llapi_layout_comp *comp = NULL;
2643 if (src_layout == NULL ||
2644 list_empty((struct list_head *)&src_layout->llot_comp_list))
2647 if (new_layout == NULL) {
2648 new_layout = __llapi_layout_alloc();
2649 if (new_layout == NULL) {
2655 list_for_each_entry(comp, &src_layout->llot_comp_list, llc_list) {
2656 new = __llapi_comp_alloc(0);
2662 new->llc_pattern = comp->llc_pattern;
2663 new->llc_stripe_size = comp->llc_stripe_size;
2664 new->llc_stripe_count = comp->llc_stripe_count;
2665 new->llc_stripe_offset = comp->llc_stripe_offset;
2667 if (comp->llc_pool_name[0] != '\0')
2668 strncpy(new->llc_pool_name, comp->llc_pool_name,
2669 sizeof(new->llc_pool_name));
2671 for (i = 0; i < comp->llc_objects_count; i++) {
2672 if (__llapi_comp_objects_realloc(new,
2673 stripe_number_roundup(i)) < 0) {
2675 __llapi_comp_free(new);
2678 new->llc_objects[i].l_ost_idx = \
2679 comp->llc_objects[i].l_ost_idx;
2682 new->llc_objects_count = comp->llc_objects_count;
2683 new->llc_extent.e_start = comp->llc_extent.e_start;
2684 new->llc_extent.e_end = comp->llc_extent.e_end;
2685 new->llc_id = comp->llc_id;
2686 new->llc_flags = comp->llc_flags;
2688 list_add_tail(&new->llc_list, &new_layout->llot_comp_list);
2689 new_layout->llot_cur_comp = new;
2691 new_layout->llot_is_composite = true;
2693 *dst_layout = new_layout;
2696 llapi_layout_free(new_layout);
2701 * Get the last initialized component
2703 * \param[in] layout component layout list.
2706 * \retval -EINVAL not found
2707 * \retval -EISDIR directory layout
2709 int llapi_layout_get_last_init_comp(struct llapi_layout *layout)
2711 struct llapi_layout_comp *comp = NULL, *head = NULL;
2713 if (!layout->llot_is_composite)
2716 head = list_entry(layout->llot_comp_list.next, typeof(*comp), llc_list);
2719 if (head->llc_id == 0 && !(head->llc_flags & LCME_FL_INIT))
2723 /* traverse the components from the tail to find the last init one */
2724 comp = list_entry(layout->llot_comp_list.prev, typeof(*comp), llc_list);
2725 while (comp != head) {
2726 if (comp->llc_flags & LCME_FL_INIT)
2728 comp = list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
2731 layout->llot_cur_comp = comp;
2733 return comp->llc_flags & LCME_FL_INIT ? 0 : -EINVAL;
2737 * Interit stripe info from the file's component to the mirror
2739 * \param[in] layout file component layout list.
2740 * \param[in] layout mirro component layout list.
2742 * \retval 0 on success
2743 * \retval -EINVAL on error
2745 int llapi_layout_mirror_inherit(struct llapi_layout *f_layout,
2746 struct llapi_layout *m_layout)
2748 struct llapi_layout_comp *m_comp = NULL;
2749 struct llapi_layout_comp *f_comp = NULL;
2752 f_comp = __llapi_layout_cur_comp(f_layout);
2755 m_comp = __llapi_layout_cur_comp(m_layout);
2759 m_comp->llc_stripe_size = f_comp->llc_stripe_size;
2760 m_comp->llc_stripe_count = f_comp->llc_stripe_count;
2766 * Find all stale components.
2768 * \param[in] layout component layout list.
2769 * \param[out] comp array of stale component info.
2770 * \param[in] comp_size array size of @comp.
2771 * \param[in] mirror_ids array of mirror id that only components
2772 * belonging to these mirror will be collected.
2773 * \param[in] ids_nr number of mirror ids array.
2775 * \retval number of component info collected on sucess or
2776 * an error code on failure.
2778 int llapi_mirror_find_stale(struct llapi_layout *layout,
2779 struct llapi_resync_comp *comp, size_t comp_size,
2780 __u16 *mirror_ids, int ids_nr)
2785 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2793 uint64_t start, end;
2795 rc = llapi_layout_comp_flags_get(layout, &flags);
2799 if (!(flags & LCME_FL_STALE))
2802 rc = llapi_layout_mirror_id_get(layout, &mirror_id);
2806 /* the caller only wants stale components from specific
2811 for (j = 0; j < ids_nr; j++) {
2812 if (mirror_ids[j] == mirror_id)
2816 /* not in the specified mirror */
2819 } else if (flags & LCME_FL_NOSYNC) {
2820 /* if not specified mirrors, do not resync "nosync"
2825 rc = llapi_layout_comp_id_get(layout, &id);
2829 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2833 /* pack this component into @comp array */
2834 comp[idx].lrc_id = id;
2835 comp[idx].lrc_mirror_id = mirror_id;
2836 comp[idx].lrc_start = start;
2837 comp[idx].lrc_end = end;
2840 if (idx >= comp_size) {
2846 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2853 return rc < 0 ? rc : idx;
2856 /* locate @layout to a valid component covering file [file_start, file_end) */
2857 uint32_t llapi_mirror_find(struct llapi_layout *layout,
2858 uint64_t file_start, uint64_t file_end,
2861 uint32_t mirror_id = 0;
2864 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2870 uint64_t start, end;
2871 uint32_t flags, id, rid;
2873 rc = llapi_layout_comp_flags_get(layout, &flags);
2877 if (flags & LCME_FL_STALE)
2880 rc = llapi_layout_mirror_id_get(layout, &rid);
2884 rc = llapi_layout_comp_id_get(layout, &id);
2888 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2892 if (file_start >= start && file_start < end) {
2895 else if (mirror_id != rid || *endp != start)
2898 file_start = *endp = end;
2899 if (end >= file_end)
2904 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2914 int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
2915 struct llapi_resync_comp *comp_array,
2916 int comp_size, uint64_t start, uint64_t end)
2918 size_t page_size = sysconf(_SC_PAGESIZE);
2919 const size_t buflen = 4 << 20; /* 4M */
2921 uint64_t pos = start;
2922 uint64_t data_off = pos, data_end = pos;
2928 rc = posix_memalign(&buf, page_size, buflen);
2933 uint64_t mirror_end;
2938 if (pos >= data_end) {
2942 if (pos >= mirror_end || !src) {
2943 rc = llapi_mirror_find(layout, pos, end,
2948 /* restrict mirror end by resync end */
2949 mirror_end = MIN(end, mirror_end);
2952 tmp_off = llapi_mirror_data_seek(fd, src, pos,
2955 /* switch to full copy */
2956 to_read = mirror_end - pos;
2960 data_end = data_off + data_size;
2962 data_off = MIN(data_off, mirror_end);
2963 data_end = MIN(data_end, mirror_end);
2965 /* align by page, if there is data block to copy */
2967 data_off &= ~(page_size - 1);
2970 if (pos < data_off) {
2971 for (i = 0; i < comp_size; i++) {
2974 uint32_t mid = comp_array[i].lrc_mirror_id;
2976 /* skip non-overlapped component */
2977 if (pos >= comp_array[i].lrc_end ||
2978 data_off <= comp_array[i].lrc_start)
2981 if (pos < comp_array[i].lrc_start)
2982 cur_pos = comp_array[i].lrc_start;
2986 if (data_off > comp_array[i].lrc_end)
2987 to_punch = comp_array[i].lrc_end -
2990 to_punch = data_off - cur_pos;
2992 if (comp_array[i].lrc_end == OBD_OBJECT_EOF) {
2993 /* the last component can be truncated
2996 rc = llapi_mirror_truncate(fd, mid,
2998 /* hole at the end of file, so just
2999 * truncate up to set size.
3001 if (!rc && data_off == data_end)
3002 rc = llapi_mirror_truncate(fd,
3005 rc = llapi_mirror_punch(fd,
3006 comp_array[i].lrc_mirror_id,
3009 /* if failed then read failed hole range */
3013 if (pos + to_punch == data_off)
3014 to_read = data_end - pos;
3022 if (pos == mirror_end)
3024 to_read = data_end - pos;
3029 assert(data_end <= mirror_end);
3031 to_read = MIN(buflen, to_read);
3032 to_read = ((to_read - 1) | (page_size - 1)) + 1;
3033 bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
3034 if (bytes_read == 0) {
3038 if (bytes_read < 0) {
3043 /* round up to page align to make direct IO happy. */
3044 to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
3046 for (i = 0; i < comp_size; i++) {
3049 size_t to_write2 = to_write;
3051 /* skip non-overlapped component */
3052 if (pos >= comp_array[i].lrc_end ||
3053 pos + to_write <= comp_array[i].lrc_start)
3056 if (pos < comp_array[i].lrc_start)
3057 pos2 = comp_array[i].lrc_start;
3059 to_write2 -= pos2 - pos;
3061 if ((pos + to_write) > comp_array[i].lrc_end)
3062 to_write2 -= pos + to_write -
3063 comp_array[i].lrc_end;
3065 written = llapi_mirror_write(fd,
3066 comp_array[i].lrc_mirror_id,
3071 * this component is not written successfully,
3072 * mark it using its lrc_synced, it is supposed
3073 * to be false before getting here.
3075 * And before this function returns, all
3076 * elements of comp_array will reverse their
3077 * lrc_synced flag to reflect their true
3080 comp_array[i].lrc_synced = true;
3081 llapi_error(LLAPI_MSG_ERROR, written,
3082 "component %u not synced",
3083 comp_array[i].lrc_id);
3088 assert(written == to_write2);
3096 /* fatal error happens */
3097 for (i = 0; i < comp_size; i++)
3098 comp_array[i].lrc_synced = false;
3103 * no fatal error happens, each lrc_synced tells whether the component
3104 * has been resync successfully (note: we'd reverse the value to
3105 * reflect its true meaning.
3107 for (i = 0; i < comp_size; i++) {
3108 comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
3109 if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
3110 rc = llapi_mirror_truncate(fd,
3111 comp_array[i].lrc_mirror_id, pos);
3113 comp_array[i].lrc_synced = false;
3118 * returns the first error code for partially successful resync if
3124 enum llapi_layout_comp_sanity_error {
3126 LSE_INCOMPLETE_MIRROR,
3127 LSE_ADJACENT_EXTENSION,
3131 LSE_DOM_EXTENSION_FOLLOWING,
3134 LSE_NOT_ZERO_LENGTH_EXTENDABLE,
3135 LSE_END_NOT_GREATER,
3136 LSE_ZERO_LENGTH_NORMAL,
3137 LSE_NOT_ADJACENT_PREV,
3145 const char *const llapi_layout_strerror[] =
3148 [LSE_INCOMPLETE_MIRROR] =
3149 "Incomplete mirror - must go to EOF",
3150 [LSE_ADJACENT_EXTENSION] =
3151 "No adjacent extension space components",
3152 [LSE_INIT_EXTENSION] =
3153 "Cannot apply extension flag to init components",
3156 [LSE_DOM_EXTENSION] =
3157 "DoM components can't be extension space",
3158 [LSE_DOM_EXTENSION_FOLLOWING] =
3159 "DoM components cannot be followed by extension space",
3161 "DoM component should be the first one in a file/mirror",
3162 [LSE_SET_COMP_START] =
3163 "Must set previous component extent before adding next",
3164 [LSE_NOT_ZERO_LENGTH_EXTENDABLE] =
3165 "Extendable component must start out zero-length",
3166 [LSE_END_NOT_GREATER] =
3167 "Component end is before end of previous component",
3168 [LSE_ZERO_LENGTH_NORMAL] =
3169 "Zero length components must be followed by extension",
3170 [LSE_NOT_ADJACENT_PREV] =
3171 "Components not adjacent (end != next->start",
3172 [LSE_START_GT_END] =
3173 "Component start is > end",
3175 "The component end must be aligned by the stripe size",
3177 "The extension size must be aligned by the stripe size",
3179 "An unknown OST idx is specified",
3182 struct llapi_layout_sanity_args {
3183 char lsa_fsname[MAX_OBD_NAME + 1];
3184 bool lsa_incomplete;
3190 /* The component flags can be set by users at creation/modification time. */
3191 #define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
3195 * When modified, adjust llapi_stripe_param_verify() if needed as well.
3197 static int llapi_layout_sanity_cb(struct llapi_layout *layout,
3200 struct llapi_layout_comp *comp, *next, *prev;
3201 struct llapi_layout_sanity_args *args = arg;
3202 bool first_comp = false;
3204 comp = __llapi_layout_cur_comp(layout);
3210 if (comp->llc_list.prev != &layout->llot_comp_list)
3211 prev = list_entry(comp->llc_list.prev, typeof(*prev),
3216 if (comp->llc_list.next != &layout->llot_comp_list)
3217 next = list_entry(comp->llc_list.next, typeof(*next),
3222 /* Start of zero implies a new mirror */
3223 if (comp->llc_extent.e_start == 0) {
3225 /* Most checks apply only within one mirror, this is an
3227 if (prev && prev->llc_extent.e_end != LUSTRE_EOF) {
3228 args->lsa_rc = LSE_INCOMPLETE_MIRROR;
3235 if (next && next->llc_extent.e_start == 0)
3238 /* Flag sanity checks */
3239 /* No adjacent extension components */
3240 if ((comp->llc_flags & LCME_FL_EXTENSION) && next &&
3241 (next->llc_flags & LCME_FL_EXTENSION)) {
3242 args->lsa_rc = LSE_ADJACENT_EXTENSION;
3246 /* Extension flag cannot be applied to init components and the first
3247 * component of each mirror is automatically init */
3248 if ((comp->llc_flags & LCME_FL_EXTENSION) &&
3249 (comp->llc_flags & LCME_FL_INIT || first_comp)) {
3250 args->lsa_rc = LSE_INIT_EXTENSION;
3254 if (comp->llc_ondisk) {
3255 if (comp->llc_flags & LCME_FL_NEG)
3256 args->lsa_rc = LSE_FLAGS;
3257 } else if (!args->lsa_incomplete) {
3258 if (args->lsa_flr) {
3259 if (comp->llc_flags & ~LCME_USER_COMP_FLAGS)
3260 args->lsa_rc = LSE_FLAGS;
3262 if (comp->llc_flags &
3263 ~(LCME_FL_EXTENSION | LCME_FL_PREF_RW))
3264 args->lsa_rc = LSE_FLAGS;
3270 /* DoM sanity checks */
3271 if (comp->llc_pattern == LLAPI_LAYOUT_MDT ||
3272 comp->llc_pattern == LOV_PATTERN_MDT) {
3273 /* DoM components can't be extension components */
3274 if (comp->llc_flags & LCME_FL_EXTENSION) {
3275 args->lsa_rc = LSE_DOM_EXTENSION;
3278 /* DoM components cannot be followed by an extension comp */
3279 if (next && (next->llc_flags & LCME_FL_EXTENSION)) {
3280 args->lsa_rc = LSE_DOM_EXTENSION_FOLLOWING;
3284 /* DoM should be the first component in a mirror */
3286 args->lsa_rc = LSE_DOM_FIRST;
3292 /* Extent sanity checks */
3293 /* Must set previous component extent before adding another */
3294 if (prev && prev->llc_extent.e_start == 0 &&
3295 prev->llc_extent.e_end == 0) {
3296 args->lsa_rc = LSE_SET_COMP_START;
3300 if (!args->lsa_incomplete) {
3301 /* Components followed by extension space (extendable
3302 * components) must be zero length before initialization.
3303 * (Except for first comp, which will be initialized on
3305 if (next && (next->llc_flags & LCME_FL_EXTENSION) &&
3306 !first_comp && !(comp->llc_flags & LCME_FL_INIT) &&
3307 comp->llc_extent.e_start != comp->llc_extent.e_end) {
3308 args->lsa_rc = LSE_NOT_ZERO_LENGTH_EXTENDABLE;
3312 /* End must come after end of previous comp */
3313 if (prev && comp->llc_extent.e_end < prev->llc_extent.e_end) {
3314 args->lsa_rc = LSE_END_NOT_GREATER;
3318 /* Components not followed by ext space must have length > 0. */
3319 if (comp->llc_extent.e_start == comp->llc_extent.e_end &&
3320 (next == NULL || !(next->llc_flags & LCME_FL_EXTENSION))) {
3321 args->lsa_rc = LSE_ZERO_LENGTH_NORMAL;
3325 /* The component end must be aligned by the stripe size */
3326 if ((comp->llc_flags & LCME_FL_EXTENSION) &&
3327 (prev->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
3328 if (comp->llc_extent.e_end != LUSTRE_EOF &&
3329 comp->llc_extent.e_end % prev->llc_stripe_size) {
3330 args->lsa_rc = LSE_ALIGN_END;
3333 if ((comp->llc_stripe_size * SEL_UNIT_SIZE) %
3334 prev->llc_stripe_size) {
3335 args->lsa_rc = LSE_ALIGN_EXT;
3338 } else if (!(comp->llc_flags & LCME_FL_EXTENSION) &&
3339 (comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
3340 if (comp->llc_extent.e_end != LUSTRE_EOF &&
3341 comp->llc_extent.e_end !=
3342 comp->llc_extent.e_start &&
3343 comp->llc_extent.e_end % comp->llc_stripe_size) {
3344 args->lsa_rc = LSE_ALIGN_END;
3350 /* Components must have start == prev->end */
3351 if (prev && comp->llc_extent.e_start != 0 &&
3352 comp->llc_extent.e_start != prev->llc_extent.e_end) {
3353 args->lsa_rc = LSE_NOT_ADJACENT_PREV;
3357 /* Components must have start <= end */
3358 if (comp->llc_extent.e_start > comp->llc_extent.e_end) {
3359 args->lsa_rc = LSE_START_GT_END;
3363 if (args->lsa_fsname[0] != '\0') {
3366 if (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC) {
3367 assert(comp->llc_stripe_count <=
3368 comp->llc_objects_count);
3370 for (i = 0; i < comp->llc_stripe_count && rc == 0; i++){
3371 if (comp->llc_objects[i].l_ost_idx ==
3372 LLAPI_LAYOUT_IDX_MAX) {
3376 rc = llapi_layout_search_ost(
3377 comp->llc_objects[i].l_ost_idx,
3378 comp->llc_pool_name, args->lsa_fsname);
3380 } else if (comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT) {
3381 rc = llapi_layout_search_ost(
3382 comp->llc_stripe_offset,
3383 comp->llc_pool_name, args->lsa_fsname);
3386 args->lsa_rc = LSE_UNKNOWN_OST;
3391 return LLAPI_LAYOUT_ITER_CONT;
3394 errno = errno ? errno : EINVAL;
3395 return LLAPI_LAYOUT_ITER_STOP;
3398 /* Print explanation of layout error */
3399 void llapi_layout_sanity_perror(int error)
3401 if (error >= LSE_LAST || error < 0) {
3402 fprintf(stdout, "Invalid layout, unrecognized error: %d\n",
3405 fprintf(stdout, "Invalid layout: %s\n",
3406 llapi_layout_strerror[error]);
3410 /* Walk a layout and enforce sanity checks that apply to > 1 component
3412 * The core idea here is that of sanity checking individual tokens vs semantic
3414 * We cannot check everything at the individual component level ('token'),
3415 * instead we must check whether or not the full layout has a valid meaning.
3417 * An example of a component level check is "is stripe size valid?". That is
3418 * handled when setting stripe size.
3420 * An example of a layout level check is "are the extents of these components
3421 * valid when adjacent to one another", or "can we set these flags on adjacent
3424 * \param[in] layout component layout list.
3425 * \param[in] fname file the layout to be checked for
3426 * \param[in] incomplete if layout is complete or not - some checks can
3427 * only be done on complete layouts.
3428 * \param[in] flr set when this is called from FLR mirror create
3430 * \retval 0, success, positive: various errors, see
3431 * llapi_layout_sanity_perror, -1, failure
3433 int llapi_layout_sanity(struct llapi_layout *layout,
3438 struct llapi_layout_sanity_args args = { { 0 } };
3439 struct llapi_layout_comp *curr;
3445 curr = layout->llot_cur_comp;
3449 /* Make sure we are on a Lustre file system */
3451 rc = llapi_search_fsname(fname, args.lsa_fsname);
3453 llapi_error(LLAPI_MSG_ERROR, rc,
3454 "'%s' is not on a Lustre filesystem",
3463 args.lsa_incomplete = incomplete;
3465 /* When we modify an existing layout, this tells us if it's FLR */
3466 if (mirror_id_of(curr->llc_id) > 0)
3467 args.lsa_flr = true;
3470 rc = llapi_layout_comp_iterate(layout,
3471 llapi_layout_sanity_cb,
3473 if (errno == ENOENT)
3476 if (rc != LLAPI_LAYOUT_ITER_CONT)
3479 layout->llot_cur_comp = curr;
3484 int llapi_layout_dom_size(struct llapi_layout *layout, uint64_t *size)
3486 uint64_t pattern, start;
3489 if (!layout || !llapi_layout_is_composite(layout)) {
3494 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
3498 rc = llapi_layout_pattern_get(layout, &pattern);
3502 if (pattern != LOV_PATTERN_MDT && pattern != LLAPI_LAYOUT_MDT) {
3507 rc = llapi_layout_comp_extent_get(layout, &start, size);
3515 int lov_comp_md_size(struct lov_comp_md_v1 *lcm)
3517 if (lcm->lcm_magic == LOV_MAGIC_V1 || lcm->lcm_magic == LOV_MAGIC_V3) {
3518 struct lov_user_md *lum = (void *)lcm;
3520 return lov_user_md_size(lum->lmm_stripe_count, lum->lmm_magic);
3523 if (lcm->lcm_magic == LOV_MAGIC_FOREIGN) {
3524 struct lov_foreign_md *lfm = (void *)lcm;
3526 return lfm->lfm_length;
3529 if (lcm->lcm_magic != LOV_MAGIC_COMP_V1)
3532 return lcm->lcm_size;
3535 int llapi_get_lum_file_fd(int dir_fd, const char *fname, __u64 *valid,
3536 lstatx_t *statx, struct lov_user_md *lum,
3539 struct lov_user_mds_data *lmd;
3540 char buf[65536 + offsetof(typeof(*lmd), lmd_lmm)];
3544 if (lum && lumsize < sizeof(*lum))
3547 /* If a file name is provided, it is relative to the parent directory */
3553 lmd = (struct lov_user_mds_data *)buf;
3554 rc = get_lmd_info_fd(fname, parent_fd, dir_fd, buf, sizeof(buf),
3560 *valid = lmd->lmd_flags;
3563 memcpy(statx, &lmd->lmd_stx, sizeof(*statx));
3566 if (lmd->lmd_lmmsize > lumsize)
3568 memcpy(lum, &lmd->lmd_lmm, lmd->lmd_lmmsize);
3574 int llapi_get_lum_dir_fd(int dir_fd, __u64 *valid, lstatx_t *statx,
3575 struct lov_user_md *lum, size_t lumsize)
3577 return llapi_get_lum_file_fd(dir_fd, NULL, valid, statx, lum, lumsize);
3580 int llapi_get_lum_file(const char *path, __u64 *valid, lstatx_t *statx,
3581 struct lov_user_md *lum, size_t lumsize)
3583 char parent[PATH_MAX];
3590 tmp = strrchr(path, '/');
3592 strncpy(parent, ".", sizeof(parent) - 1);
3595 strncpy(parent, path, tmp - path);
3596 offset = tmp - path - 1;
3597 parent[tmp - path] = 0;
3602 fname += offset + 2;
3604 dir_fd = open(parent, O_RDONLY);
3607 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
3611 rc = llapi_get_lum_file_fd(dir_fd, fname, valid, statx, lum, lumsize);
3616 int llapi_get_lum_dir(const char *path, __u64 *valid, lstatx_t *statx,
3617 struct lov_user_md *lum, size_t lumsize)
3622 dir_fd = open(path, O_RDONLY);
3625 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
3629 rc = llapi_get_lum_dir_fd(dir_fd, valid, statx, lum, lumsize);