4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the GNU Lesser General Public License
8 * (LGPL) version 2.1 or (at your discretion) any later version.
9 * (LGPL) version 2.1 accompanies this distribution, and is available at
10 * http://www.gnu.org/licenses/lgpl-2.1.html
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
20 * lustre/utils/liblustreapi_layout.c
22 * lustreapi library for layout calls for interacting with the layout of
23 * Lustre files while hiding details of the internal data structures
26 * Copyright (c) 2016, 2017, Intel Corporation.
28 * Author: Ned Bass <bass6@llnl.gov>
38 #include <sys/xattr.h>
39 #include <sys/param.h>
43 #include <libcfs/util/list.h>
44 #include <lustre/lustreapi.h>
45 #include "lustreapi_internal.h"
48 * Layout component, which contains all attributes of a plain
51 struct llapi_layout_comp {
53 uint64_t llc_stripe_size;
54 uint64_t llc_stripe_count;
55 uint64_t llc_stripe_offset;
56 /* Add 1 so user always gets back a null terminated string. */
57 char llc_pool_name[LOV_MAXPOOLNAME + 1];
58 /** Number of objects in llc_objects array if was initialized. */
59 uint32_t llc_objects_count;
60 struct lov_user_ost_data_v1 *llc_objects;
61 /* fields used only for composite layouts */
62 struct lu_extent llc_extent; /* [start, end) of component */
63 uint32_t llc_id; /* unique ID of component */
64 uint32_t llc_flags; /* LCME_FL_* flags */
65 uint64_t llc_timestamp; /* snapshot timestamp */
66 struct list_head llc_list; /* linked to the llapi_layout
72 * An Opaque data type abstracting the layout of a Lustre file.
75 uint32_t llot_magic; /* LLAPI_LAYOUT_MAGIC */
78 bool llot_is_composite;
79 uint16_t llot_mirror_count;
80 /* Cursor pointing to one of the components in llot_comp_list */
81 struct llapi_layout_comp *llot_cur_comp;
82 struct list_head llot_comp_list;
86 * Compute the number of elements in the lmm_objects array of \a lum
87 * with size \a lum_size.
89 * \param[in] lum the struct lov_user_md to check
90 * \param[in] lum_size the number of bytes in \a lum
92 * \retval number of elements in array lum->lmm_objects
94 static int llapi_layout_objects_in_lum(struct lov_user_md *lum, size_t lum_size)
99 if (lum_size < lov_user_md_size(0, LOV_MAGIC_V1))
102 if (lum->lmm_magic == __swab32(LOV_MAGIC_V1) ||
103 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
104 magic = __swab32(lum->lmm_magic);
106 magic = lum->lmm_magic;
108 base_size = lov_user_md_size(0, magic);
110 if (lum_size <= base_size)
113 return (lum_size - base_size) / sizeof(lum->lmm_objects[0]);
117 * Byte-swap the fields of struct lov_user_md.
119 * XXX Rather than duplicating swabbing code here, we should eventually
120 * refactor the needed functions in lustre/ptlrpc/pack_generic.c
121 * into a library that can be shared between kernel and user code.
124 llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
126 int i, j, ent_count, obj_count;
127 struct lov_comp_md_v1 *comp_v1 = NULL;
128 struct lov_comp_md_entry_v1 *ent;
129 struct lov_user_ost_data *lod;
131 if (lum->lmm_magic != __swab32(LOV_MAGIC_V1) &&
132 lum->lmm_magic != __swab32(LOV_MAGIC_V3) &&
133 lum->lmm_magic != __swab32(LOV_MAGIC_COMP_V1))
136 if (lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
137 comp_v1 = (struct lov_comp_md_v1 *)lum;
139 if (comp_v1 != NULL) {
140 comp_v1->lcm_magic = __swab32(comp_v1->lcm_magic);
141 comp_v1->lcm_size = __swab32(comp_v1->lcm_size);
142 comp_v1->lcm_layout_gen = __swab32(comp_v1->lcm_layout_gen);
143 comp_v1->lcm_flags = __swab16(comp_v1->lcm_flags);
144 comp_v1->lcm_entry_count = __swab16(comp_v1->lcm_entry_count);
145 ent_count = comp_v1->lcm_entry_count;
150 for (i = 0; i < ent_count; i++) {
151 if (comp_v1 != NULL) {
152 ent = &comp_v1->lcm_entries[i];
153 ent->lcme_id = __swab32(ent->lcme_id);
154 ent->lcme_flags = __swab32(ent->lcme_flags);
155 ent->lcme_timestamp = __swab64(ent->lcme_timestamp);
156 ent->lcme_extent.e_start = __swab64(ent->lcme_extent.e_start);
157 ent->lcme_extent.e_end = __swab64(ent->lcme_extent.e_end);
158 ent->lcme_offset = __swab32(ent->lcme_offset);
159 ent->lcme_size = __swab32(ent->lcme_size);
161 lum = (struct lov_user_md *)((char *)comp_v1 +
163 lum_size = ent->lcme_size;
165 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
167 lum->lmm_magic = __swab32(lum->lmm_magic);
168 lum->lmm_pattern = __swab32(lum->lmm_pattern);
169 lum->lmm_stripe_size = __swab32(lum->lmm_stripe_size);
170 lum->lmm_stripe_count = __swab16(lum->lmm_stripe_count);
171 lum->lmm_stripe_offset = __swab16(lum->lmm_stripe_offset);
173 if (lum->lmm_magic != LOV_MAGIC_V1) {
174 struct lov_user_md_v3 *v3;
175 v3 = (struct lov_user_md_v3 *)lum;
176 lod = v3->lmm_objects;
178 lod = lum->lmm_objects;
181 for (j = 0; j < obj_count; j++)
182 lod[j].l_ost_idx = __swab32(lod[j].l_ost_idx);
187 * (Re-)allocate llc_objects[] to \a num_stripes stripes.
189 * Copy over existing llc_objects[], if any, to the new llc_objects[].
191 * \param[in] layout existing layout to be modified
192 * \param[in] num_stripes number of stripes in new layout
194 * \retval 0 if the objects are re-allocated successfully
195 * \retval -1 on error with errno set
197 static int __llapi_comp_objects_realloc(struct llapi_layout_comp *comp,
198 unsigned int new_stripes)
200 struct lov_user_ost_data_v1 *new_objects;
203 if (new_stripes > LOV_MAX_STRIPE_COUNT) {
208 if (new_stripes == comp->llc_objects_count)
211 if (new_stripes != 0 && new_stripes <= comp->llc_objects_count)
214 new_objects = realloc(comp->llc_objects,
215 sizeof(*new_objects) * new_stripes);
216 if (new_objects == NULL && new_stripes != 0) {
221 for (i = comp->llc_objects_count; i < new_stripes; i++)
222 new_objects[i].l_ost_idx = LLAPI_LAYOUT_IDX_MAX;
224 comp->llc_objects = new_objects;
225 comp->llc_objects_count = new_stripes;
231 * Allocate storage for a llapi_layout_comp with \a num_stripes stripes.
233 * \param[in] num_stripes number of stripes in new layout
235 * \retval valid pointer if allocation succeeds
236 * \retval NULL if allocation fails
238 static struct llapi_layout_comp *__llapi_comp_alloc(unsigned int num_stripes)
240 struct llapi_layout_comp *comp;
242 if (num_stripes > LOV_MAX_STRIPE_COUNT) {
247 comp = calloc(1, sizeof(*comp));
253 comp->llc_objects = NULL;
254 comp->llc_objects_count = 0;
256 if (__llapi_comp_objects_realloc(comp, num_stripes) < 0) {
262 comp->llc_pattern = LLAPI_LAYOUT_DEFAULT;
263 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
264 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
265 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
266 comp->llc_pool_name[0] = '\0';
267 comp->llc_extent.e_start = 0;
268 comp->llc_extent.e_end = LUSTRE_EOF;
271 INIT_LIST_HEAD(&comp->llc_list);
277 * Free memory allocated for \a comp
279 * \param[in] comp previously allocated by __llapi_comp_alloc()
281 static void __llapi_comp_free(struct llapi_layout_comp *comp)
283 if (comp->llc_objects != NULL)
284 free(comp->llc_objects);
289 * Free memory allocated for \a layout.
291 * \param[in] layout previously allocated by llapi_layout_alloc()
293 void llapi_layout_free(struct llapi_layout *layout)
295 struct llapi_layout_comp *comp, *n;
300 list_for_each_entry_safe(comp, n, &layout->llot_comp_list, llc_list) {
301 list_del_init(&comp->llc_list);
302 __llapi_comp_free(comp);
308 * Allocate and initialize a llapi_layout structure.
310 * \retval valid llapi_layout pointer on success
311 * \retval NULL if memory allocation fails
313 static struct llapi_layout *__llapi_layout_alloc(void)
315 struct llapi_layout *layout;
317 layout = calloc(1, sizeof(*layout));
318 if (layout == NULL) {
324 layout->llot_magic = LLAPI_LAYOUT_MAGIC;
325 layout->llot_gen = 0;
326 layout->llot_flags = 0;
327 layout->llot_is_composite = false;
328 layout->llot_mirror_count = 1;
329 layout->llot_cur_comp = NULL;
330 INIT_LIST_HEAD(&layout->llot_comp_list);
336 * Allocate and initialize a new plain layout.
338 * \retval valid llapi_layout pointer on success
339 * \retval NULL if memory allocation fails
341 struct llapi_layout *llapi_layout_alloc(void)
343 struct llapi_layout_comp *comp;
344 struct llapi_layout *layout;
346 layout = __llapi_layout_alloc();
350 comp = __llapi_comp_alloc(0);
356 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
357 layout->llot_cur_comp = comp;
363 * Check if the given \a lum_size is large enough to hold the required
366 * \param[in] lum the struct lov_user_md to check
367 * \param[in] lum_size the number of bytes in \a lum
369 * \retval true the \a lum_size is too small
370 * \retval false the \a lum_size is large enough
372 static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
376 if (lum_size < sizeof(lum->lmm_magic))
379 if (lum->lmm_magic == LOV_MAGIC_V1 ||
380 lum->lmm_magic == __swab32(LOV_MAGIC_V1))
381 magic = LOV_MAGIC_V1;
382 else if (lum->lmm_magic == LOV_MAGIC_V3 ||
383 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
384 magic = LOV_MAGIC_V3;
385 else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
386 lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
387 magic = LOV_MAGIC_COMP_V1;
391 if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
392 return lum_size < lov_user_md_size(0, magic);
394 return lum_size < sizeof(struct lov_comp_md_v1);
397 /* Verify if the objects count in lum is consistent with the
398 * stripe count in lum. It applies to regular file only. */
399 static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
401 struct lov_comp_md_v1 *comp_v1 = NULL;
402 int i, ent_count, obj_count;
404 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
405 comp_v1 = (struct lov_comp_md_v1 *)lum;
406 ent_count = comp_v1->lcm_entry_count;
407 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
408 lum->lmm_magic == LOV_MAGIC_V3) {
414 for (i = 0; i < ent_count; i++) {
416 lum = (struct lov_user_md *)((char *)comp_v1 +
417 comp_v1->lcm_entries[i].lcme_offset);
418 lum_size = comp_v1->lcm_entries[i].lcme_size;
420 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
423 if (!(comp_v1->lcm_entries[i].lcme_flags &
424 LCME_FL_INIT) && obj_count != 0)
426 } else if (obj_count != lum->lmm_stripe_count) {
434 * Convert the data from a lov_user_md to a newly allocated llapi_layout.
435 * The caller is responsible for freeing the returned pointer.
437 * \param[in] lov_xattr LOV user metadata xattr to copy data from
438 * \param[in] lov_xattr_size size the lov_xattr_size passed in
439 * \param[in] flags flags to control how layout is retrieved
441 * \retval valid llapi_layout pointer on success
442 * \retval NULL if memory allocation fails
444 struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
445 ssize_t lov_xattr_size,
446 enum llapi_layout_get_flags flags)
448 struct lov_user_md *lum = lov_xattr;
449 struct lov_comp_md_v1 *comp_v1 = NULL;
450 struct lov_comp_md_entry_v1 *ent;
451 struct lov_user_md *v1;
452 struct llapi_layout *layout = NULL;
453 struct llapi_layout_comp *comp;
454 int i, ent_count = 0, obj_count;
456 if (lov_xattr == NULL || lov_xattr_size <= 0) {
461 /* Return an error if we got back a partial layout. */
462 if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) {
467 #if __BYTE_ORDER == __BIG_ENDIAN
468 if (flags & LLAPI_LAYOUT_GET_COPY) {
469 lum = malloc(lov_xattr_size);
474 memcpy(lum, lov_xattr, lov_xattr_size);
478 llapi_layout_swab_lov_user_md(lum, lov_xattr_size);
480 #if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 16, 53, 0)
481 #define LLAPI_LXF_CHECK_OLD 0x0001
482 if (flags & LLAPI_LXF_CHECK_OLD)
483 flags = (flags & ~LLAPI_LXF_CHECK_OLD) | LLAPI_LAYOUT_GET_CHECK;
485 if ((flags & LLAPI_LAYOUT_GET_CHECK) &&
486 !llapi_layout_lum_valid(lum, lov_xattr_size)) {
491 layout = __llapi_layout_alloc();
492 if (layout == NULL) {
497 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
498 comp_v1 = (struct lov_comp_md_v1 *)lum;
499 ent_count = comp_v1->lcm_entry_count;
500 layout->llot_gen = comp_v1->lcm_layout_gen;
501 layout->llot_is_composite = true;
502 layout->llot_mirror_count = comp_v1->lcm_mirror_count + 1;
503 layout->llot_gen = comp_v1->lcm_layout_gen;
504 layout->llot_flags = comp_v1->lcm_flags;
505 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
506 lum->lmm_magic == LOV_MAGIC_V3) {
508 layout->llot_is_composite = false;
510 if (lov_xattr_size <= 0) {
519 if (ent_count == 0) {
524 v1 = (struct lov_user_md *)lum;
525 for (i = 0; i < ent_count; i++) {
526 if (comp_v1 != NULL) {
527 ent = &comp_v1->lcm_entries[i];
528 v1 = (struct lov_user_md *)((char *)comp_v1 +
530 lov_xattr_size = ent->lcme_size;
535 obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
536 comp = __llapi_comp_alloc(obj_count);
541 comp->llc_extent.e_start = ent->lcme_extent.e_start;
542 comp->llc_extent.e_end = ent->lcme_extent.e_end;
543 comp->llc_id = ent->lcme_id;
544 comp->llc_flags = ent->lcme_flags;
545 if (comp->llc_flags & LCME_FL_NOSYNC)
546 comp->llc_timestamp = ent->lcme_timestamp;
548 comp->llc_extent.e_start = 0;
549 comp->llc_extent.e_end = LUSTRE_EOF;
554 if (v1->lmm_pattern == LOV_PATTERN_RAID0)
555 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
556 else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 |
557 LOV_PATTERN_OVERSTRIPING))
558 comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING;
559 else if (v1->lmm_pattern == LOV_PATTERN_MDT)
560 comp->llc_pattern = LLAPI_LAYOUT_MDT;
562 /* Lustre only supports RAID0, overstripping
565 comp->llc_pattern = v1->lmm_pattern;
567 if (v1->lmm_stripe_size == 0)
568 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
570 comp->llc_stripe_size = v1->lmm_stripe_size;
572 if (v1->lmm_stripe_count == (typeof(v1->lmm_stripe_count))-1)
573 comp->llc_stripe_count = LLAPI_LAYOUT_WIDE;
574 else if (v1->lmm_stripe_count == 0)
575 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
577 comp->llc_stripe_count = v1->lmm_stripe_count;
579 if (v1->lmm_stripe_offset ==
580 (typeof(v1->lmm_stripe_offset))-1)
581 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
583 comp->llc_stripe_offset = v1->lmm_stripe_offset;
585 if (v1->lmm_magic != LOV_USER_MAGIC_V1) {
586 const struct lov_user_md_v3 *lumv3;
587 lumv3 = (struct lov_user_md_v3 *)v1;
588 snprintf(comp->llc_pool_name,
589 sizeof(comp->llc_pool_name),
590 "%s", lumv3->lmm_pool_name);
591 memcpy(comp->llc_objects, lumv3->lmm_objects,
592 obj_count * sizeof(lumv3->lmm_objects[0]));
594 const struct lov_user_md_v1 *lumv1;
595 lumv1 = (struct lov_user_md_v1 *)v1;
596 memcpy(comp->llc_objects, lumv1->lmm_objects,
597 obj_count * sizeof(lumv1->lmm_objects[0]));
601 comp->llc_stripe_offset =
602 comp->llc_objects[0].l_ost_idx;
604 comp->llc_ondisk = true;
605 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
606 layout->llot_cur_comp = comp;
610 if (lum != lov_xattr)
614 llapi_layout_free(layout);
619 __u32 llapi_pattern_to_lov(uint64_t pattern)
624 case LLAPI_LAYOUT_DEFAULT:
625 lov_pattern = LOV_PATTERN_RAID0;
627 case LLAPI_LAYOUT_RAID0:
628 lov_pattern = LOV_PATTERN_RAID0;
630 case LLAPI_LAYOUT_MDT:
631 lov_pattern = LOV_PATTERN_MDT;
633 case LLAPI_LAYOUT_OVERSTRIPING:
634 lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0;
637 lov_pattern = EINVAL;
644 * Convert the data from a llapi_layout to a newly allocated lov_user_md.
645 * The caller is responsible for freeing the returned pointer.
647 * \param[in] layout the layout to copy from
649 * \retval valid lov_user_md pointer on success
650 * \retval NULL if memory allocation fails or the layout is invalid
652 static struct lov_user_md *
653 llapi_layout_to_lum(const struct llapi_layout *layout)
655 struct llapi_layout_comp *comp;
656 struct lov_comp_md_v1 *comp_v1 = NULL;
657 struct lov_comp_md_entry_v1 *ent;
658 struct lov_user_md *lum = NULL;
663 if (layout == NULL ||
664 list_empty((struct list_head *)&layout->llot_comp_list)) {
669 /* Allocate header of lov_comp_md_v1 if necessary */
670 if (layout->llot_is_composite) {
673 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
676 lum_size = sizeof(*comp_v1) + comp_cnt * sizeof(*ent);
677 lum = calloc(lum_size, 1);
682 comp_v1 = (struct lov_comp_md_v1 *)lum;
683 comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
684 comp_v1->lcm_size = lum_size;
685 comp_v1->lcm_layout_gen = 0;
686 comp_v1->lcm_flags = layout->llot_flags;
687 comp_v1->lcm_entry_count = comp_cnt;
688 comp_v1->lcm_mirror_count = layout->llot_mirror_count - 1;
692 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
693 struct lov_user_md *blob;
696 int i, obj_count = 0;
697 struct lov_user_ost_data *lmm_objects;
698 uint64_t pattern = comp->llc_pattern;
700 if ((pattern & LLAPI_LAYOUT_SPECIFIC) != 0) {
701 if (comp->llc_objects_count <
702 comp->llc_stripe_count) {
706 magic = LOV_USER_MAGIC_SPECIFIC;
707 obj_count = comp->llc_stripe_count;
708 pattern &= ~LLAPI_LAYOUT_SPECIFIC;
709 } else if (strlen(comp->llc_pool_name) != 0) {
710 magic = LOV_USER_MAGIC_V3;
712 magic = LOV_USER_MAGIC_V1;
714 /* All stripes must be specified when the pattern contains
715 * LLAPI_LAYOUT_SPECIFIC */
716 for (i = 0; i < obj_count; i++) {
717 if (comp->llc_objects[i].l_ost_idx ==
718 LLAPI_LAYOUT_IDX_MAX) {
724 blob_size = lov_user_md_size(obj_count, magic);
725 blob = realloc(lum, lum_size + blob_size);
731 comp_v1 = (struct lov_comp_md_v1 *)lum;
732 blob = (struct lov_user_md *)((char *)lum + lum_size);
733 lum_size += blob_size;
736 blob->lmm_magic = magic;
737 blob->lmm_pattern = llapi_pattern_to_lov(pattern);
738 if (blob->lmm_pattern == EINVAL) {
743 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
744 blob->lmm_stripe_size = 0;
746 blob->lmm_stripe_size = comp->llc_stripe_size;
748 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
749 blob->lmm_stripe_count = 0;
750 else if (comp->llc_stripe_count == LLAPI_LAYOUT_WIDE)
751 blob->lmm_stripe_count = LOV_ALL_STRIPES;
753 blob->lmm_stripe_count = comp->llc_stripe_count;
755 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
756 blob->lmm_stripe_offset = -1;
758 blob->lmm_stripe_offset = comp->llc_stripe_offset;
760 if (magic == LOV_USER_MAGIC_V3 ||
761 magic == LOV_USER_MAGIC_SPECIFIC) {
762 struct lov_user_md_v3 *lumv3 =
763 (struct lov_user_md_v3 *)blob;
765 if (comp->llc_pool_name[0] != '\0') {
766 strncpy(lumv3->lmm_pool_name,
768 sizeof(lumv3->lmm_pool_name));
770 memset(lumv3->lmm_pool_name, 0,
771 sizeof(lumv3->lmm_pool_name));
773 lmm_objects = lumv3->lmm_objects;
775 lmm_objects = blob->lmm_objects;
778 for (i = 0; i < obj_count; i++)
779 lmm_objects[i].l_ost_idx =
780 comp->llc_objects[i].l_ost_idx;
782 if (layout->llot_is_composite) {
783 ent = &comp_v1->lcm_entries[ent_idx];
784 ent->lcme_id = comp->llc_id;
785 ent->lcme_flags = comp->llc_flags;
786 if (ent->lcme_flags & LCME_FL_NOSYNC)
787 ent->lcme_timestamp = comp->llc_timestamp;
788 ent->lcme_extent.e_start = comp->llc_extent.e_start;
789 ent->lcme_extent.e_end = comp->llc_extent.e_end;
790 ent->lcme_size = blob_size;
791 ent->lcme_offset = offset;
793 comp_v1->lcm_size += blob_size;
807 * Get the parent directory of a path.
809 * \param[in] path path to get parent of
810 * \param[out] buf buffer in which to store parent path
811 * \param[in] size size in bytes of buffer \a buf
813 static void get_parent_dir(const char *path, char *buf, size_t size)
817 strncpy(buf, path, size - 1);
818 p = strrchr(buf, '/');
822 } else if (size >= 2) {
823 strncpy(buf, ".", 2);
824 buf[size - 1] = '\0';
829 * Substitute unspecified attribute values in \a layout with values
830 * from fs global settings. (lov.stripesize, lov.stripecount,
833 * \param[in] layout layout to inherit values from
834 * \param[in] path file path of the filesystem
836 static void inherit_sys_attributes(struct llapi_layout *layout,
839 struct llapi_layout_comp *comp;
840 unsigned int ssize, scount, soffset;
843 rc = sattr_cache_get_defaults(NULL, path, &scount, &ssize, &soffset);
847 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
848 if (comp->llc_pattern == LLAPI_LAYOUT_DEFAULT)
849 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
850 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
851 comp->llc_stripe_size = ssize;
852 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
853 comp->llc_stripe_count = scount;
854 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
855 comp->llc_stripe_offset = soffset;
860 * Get the current component of \a layout.
862 * \param[in] layout layout to get current component
864 * \retval valid llapi_layout_comp pointer on success
865 * \retval NULL on error
867 static struct llapi_layout_comp *
868 __llapi_layout_cur_comp(const struct llapi_layout *layout)
870 struct llapi_layout_comp *comp;
872 if (layout == NULL || layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
876 if (layout->llot_cur_comp == NULL) {
880 /* Verify data consistency */
881 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
882 if (comp == layout->llot_cur_comp)
889 * Test if any attributes of \a layout are specified.
891 * \param[in] layout the layout to check
893 * \retval true any attributes are specified
894 * \retval false all attributes are unspecified
896 static bool is_any_specified(const struct llapi_layout *layout)
898 struct llapi_layout_comp *comp;
900 comp = __llapi_layout_cur_comp(layout);
904 if (layout->llot_is_composite || layout->llot_mirror_count != 1)
907 return comp->llc_pattern != LLAPI_LAYOUT_DEFAULT ||
908 comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT ||
909 comp->llc_stripe_count != LLAPI_LAYOUT_DEFAULT ||
910 comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT ||
911 strlen(comp->llc_pool_name);
915 * Get the striping layout for the file referenced by file descriptor \a fd.
917 * If the filesystem does not support the "lustre." xattr namespace, the
918 * file must be on a non-Lustre filesystem, so set errno to ENOTTY per
919 * convention. If the file has no "lustre.lov" data, the file will
920 * inherit default values, so return a default layout.
922 * If the kernel gives us back less than the expected amount of data,
923 * we fail with errno set to EINTR.
925 * \param[in] fd open file descriptor
926 * \param[in] flags open file descriptor
928 * \retval valid llapi_layout pointer on success
929 * \retval NULL if an error occurs
931 struct llapi_layout *llapi_layout_get_by_fd(int fd,
932 enum llapi_layout_get_flags flags)
935 struct lov_user_md *lum;
936 struct llapi_layout *layout = NULL;
940 lum_len = XATTR_SIZE_MAX;
941 lum = malloc(lum_len);
945 bytes_read = fgetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_len);
946 if (bytes_read < 0) {
947 if (errno == EOPNOTSUPP)
949 else if (errno == ENODATA)
950 layout = llapi_layout_alloc();
954 /* Directories may have a positive non-zero lum->lmm_stripe_count
955 * yet have an empty lum->lmm_objects array. For non-directories the
956 * amount of data returned from the kernel must be consistent
957 * with the stripe count. */
958 if (fstat(fd, &st) < 0)
961 layout = llapi_layout_get_by_xattr(lum, bytes_read,
962 S_ISDIR(st.st_mode) ? 0 : LLAPI_LAYOUT_GET_CHECK);
969 * Get the expected striping layout for a file at \a path.
971 * Substitute expected inherited attribute values for unspecified
972 * attributes. Unspecified attributes may belong to directories and
973 * never-written-to files, and indicate that default values will be
974 * assigned when files are created or first written to. A default value
975 * is inherited from the parent directory if the attribute is specified
976 * there, otherwise it is inherited from the filesystem root.
977 * Unspecified attributes normally have the value LLAPI_LAYOUT_DEFAULT.
979 * The complete \a path need not refer to an existing file or directory,
980 * but some leading portion of it must reside within a lustre filesystem.
981 * A use case for this interface would be to obtain the literal striping
982 * values that would be assigned to a new file in a given directory.
984 * \param[in] path path for which to get the expected layout
986 * \retval valid llapi_layout pointer on success
987 * \retval NULL if an error occurs
989 static struct llapi_layout *llapi_layout_expected(const char *path)
991 struct llapi_layout *path_layout = NULL;
992 char donor_path[PATH_MAX];
997 fd = open(path, O_RDONLY);
998 if (fd < 0 && errno != ENOENT)
1004 path_layout = llapi_layout_get_by_fd(fd, 0);
1010 if (path_layout == NULL) {
1011 if (errno != ENODATA && errno != ENOENT)
1014 path_layout = llapi_layout_alloc();
1015 if (path_layout == NULL)
1019 if (is_any_specified(path_layout)) {
1020 inherit_sys_attributes(path_layout, path);
1024 llapi_layout_free(path_layout);
1026 rc = stat(path, &st);
1027 if (rc < 0 && errno != ENOENT)
1030 /* If path is a not a directory or doesn't exist, inherit layout
1031 * from parent directory. */
1032 if ((rc == 0 && !S_ISDIR(st.st_mode)) ||
1033 (rc < 0 && errno == ENOENT)) {
1034 get_parent_dir(path, donor_path, sizeof(donor_path));
1035 path_layout = llapi_layout_get_by_path(donor_path, 0);
1036 if (path_layout != NULL) {
1037 if (is_any_specified(path_layout)) {
1038 inherit_sys_attributes(path_layout, donor_path);
1041 llapi_layout_free(path_layout);
1045 /* Inherit layout from the filesystem root. */
1046 rc = llapi_search_mounts(path, 0, donor_path, NULL);
1049 path_layout = llapi_layout_get_by_path(donor_path, 0);
1050 if (path_layout == NULL)
1053 inherit_sys_attributes(path_layout, donor_path);
1058 * Get the striping layout for the file at \a path.
1060 * If \a flags contains LLAPI_LAYOUT_GET_EXPECTED, substitute
1061 * expected inherited attribute values for unspecified attributes. See
1062 * llapi_layout_expected().
1064 * \param[in] path path for which to get the layout
1065 * \param[in] flags flags to control how layout is retrieved
1067 * \retval valid llapi_layout pointer on success
1068 * \retval NULL if an error occurs
1070 struct llapi_layout *llapi_layout_get_by_path(const char *path,
1071 enum llapi_layout_get_flags flags)
1073 struct llapi_layout *layout = NULL;
1074 bool failed = false;
1079 if (flags & LLAPI_LAYOUT_GET_EXPECTED)
1080 return llapi_layout_expected(path);
1082 /* Always get layout in O_DIRECT */
1083 /* Allow fetching layout even without the key on encrypted files */
1084 open_flags = O_RDONLY | O_DIRECT | O_CIPHERTEXT;
1086 fd = open(path, open_flags);
1088 if (errno != EINVAL || failed)
1090 /* EINVAL is because a directory cannot be opened in O_DIRECT */
1091 open_flags = O_RDONLY | O_CIPHERTEXT;
1096 layout = llapi_layout_get_by_fd(fd, flags);
1105 * Get the layout for the file with FID \a fidstr in filesystem \a lustre_dir.
1107 * \param[in] lustre_dir path within Lustre filesystem containing \a fid
1108 * \param[in] fid Lustre identifier of file to get layout for
1110 * \retval valid llapi_layout pointer on success
1111 * \retval NULL if an error occurs
1113 struct llapi_layout *llapi_layout_get_by_fid(const char *lustre_dir,
1114 const struct lu_fid *fid,
1115 enum llapi_layout_get_flags flags)
1119 int saved_msg_level = llapi_msg_get_level();
1120 struct llapi_layout *layout = NULL;
1122 /* Prevent llapi internal routines from writing to console
1123 * while executing this function, then restore previous message
1125 llapi_msg_set_level(LLAPI_MSG_OFF);
1126 fd = llapi_open_by_fid(lustre_dir, fid, O_RDONLY);
1127 llapi_msg_set_level(saved_msg_level);
1132 layout = llapi_layout_get_by_fd(fd, flags);
1141 * Get the stripe count of \a layout.
1143 * \param[in] layout layout to get stripe count from
1144 * \param[out] count integer to store stripe count in
1146 * \retval 0 on success
1147 * \retval -1 if arguments are invalid
1149 int llapi_layout_stripe_count_get(const struct llapi_layout *layout,
1152 struct llapi_layout_comp *comp;
1154 comp = __llapi_layout_cur_comp(layout);
1158 if (count == NULL) {
1163 *count = comp->llc_stripe_count;
1169 * The llapi_layout API functions have these extra validity checks since
1170 * they use intuitively named macros to denote special behavior, whereas
1171 * the old API uses 0 and -1.
1174 bool llapi_layout_stripe_count_is_valid(int64_t stripe_count)
1176 return stripe_count == LLAPI_LAYOUT_DEFAULT ||
1177 stripe_count == LLAPI_LAYOUT_WIDE ||
1178 (stripe_count != 0 && stripe_count != -1 &&
1179 llapi_stripe_count_is_valid(stripe_count));
1182 static bool llapi_layout_extension_size_is_valid(uint64_t ext_size)
1184 return (ext_size != 0 &&
1185 llapi_stripe_size_is_aligned(ext_size) &&
1186 !llapi_stripe_size_is_too_big(ext_size));
1189 static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size)
1191 return stripe_size == LLAPI_LAYOUT_DEFAULT ||
1192 (stripe_size != 0 &&
1193 llapi_stripe_size_is_aligned(stripe_size) &&
1194 !llapi_stripe_size_is_too_big(stripe_size));
1197 static bool llapi_layout_stripe_index_is_valid(int64_t stripe_index)
1199 return stripe_index == LLAPI_LAYOUT_DEFAULT ||
1200 (stripe_index >= 0 &&
1201 llapi_stripe_index_is_valid(stripe_index));
1205 * Set the stripe count of \a layout.
1207 * \param[in] layout layout to set stripe count in
1208 * \param[in] count value to be set
1210 * \retval 0 on success
1211 * \retval -1 if arguments are invalid
1213 int llapi_layout_stripe_count_set(struct llapi_layout *layout,
1216 struct llapi_layout_comp *comp;
1218 comp = __llapi_layout_cur_comp(layout);
1222 if (!llapi_layout_stripe_count_is_valid(count)) {
1227 comp->llc_stripe_count = count;
1233 * Get the stripe/extension size of \a layout.
1235 * \param[in] layout layout to get stripe size from
1236 * \param[out] size integer to store stripe size in
1237 * \param[in] extension flag if extenion size is requested
1239 * \retval 0 on success
1240 * \retval -1 if arguments are invalid
1242 static int layout_stripe_size_get(const struct llapi_layout *layout,
1243 uint64_t *size, bool extension)
1245 struct llapi_layout_comp *comp;
1248 comp = __llapi_layout_cur_comp(layout);
1257 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1258 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1263 *size = comp->llc_stripe_size;
1264 if (comp->llc_flags & LCME_FL_EXTENSION)
1265 *size *= SEL_UNIT_SIZE;
1270 int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
1273 return layout_stripe_size_get(layout, size, false);
1276 int llapi_layout_extension_size_get(const struct llapi_layout *layout,
1279 return layout_stripe_size_get(layout, size, true);
1283 * Set the stripe/extension size of \a layout.
1285 * \param[in] layout layout to set stripe size in
1286 * \param[in] size value to be set
1287 * \param[in] extension flag if extenion size is passed
1289 * \retval 0 on success
1290 * \retval -1 if arguments are invalid
1292 static int layout_stripe_size_set(struct llapi_layout *layout,
1293 uint64_t size, bool extension)
1295 struct llapi_layout_comp *comp;
1298 comp = __llapi_layout_cur_comp(layout);
1302 comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
1303 if ((comp_ext && !extension) || (!comp_ext && extension)) {
1309 size /= SEL_UNIT_SIZE;
1311 if ((comp_ext && !llapi_layout_extension_size_is_valid(size)) ||
1312 (!comp_ext && !llapi_layout_stripe_size_is_valid(size))) {
1317 comp->llc_stripe_size = size;
1321 int llapi_layout_stripe_size_set(struct llapi_layout *layout,
1324 return layout_stripe_size_set(layout, size, false);
1327 int llapi_layout_extension_size_set(struct llapi_layout *layout,
1330 return layout_stripe_size_set(layout, size, true);
1334 * Get the RAID pattern of \a layout.
1336 * \param[in] layout layout to get pattern from
1337 * \param[out] pattern integer to store pattern in
1339 * \retval 0 on success
1340 * \retval -1 if arguments are invalid
1342 int llapi_layout_pattern_get(const struct llapi_layout *layout,
1345 struct llapi_layout_comp *comp;
1347 comp = __llapi_layout_cur_comp(layout);
1351 if (pattern == NULL) {
1356 *pattern = comp->llc_pattern;
1362 * Set the pattern of \a layout.
1364 * \param[in] layout layout to set pattern in
1365 * \param[in] pattern value to be set
1367 * \retval 0 on success
1368 * \retval -1 if arguments are invalid or RAID pattern
1371 int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
1373 struct llapi_layout_comp *comp;
1375 comp = __llapi_layout_cur_comp(layout);
1379 if (pattern != LLAPI_LAYOUT_DEFAULT &&
1380 pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT
1381 && pattern != LLAPI_LAYOUT_OVERSTRIPING) {
1386 comp->llc_pattern = pattern |
1387 (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC);
1392 static inline int stripe_number_roundup(int stripe_number)
1394 unsigned int round_up = (stripe_number + 8) & ~7;
1395 return round_up > LOV_MAX_STRIPE_COUNT ?
1396 LOV_MAX_STRIPE_COUNT : round_up;
1400 * Set the OST index of stripe number \a stripe_number to \a ost_index.
1402 * If only the starting stripe's OST index is specified, then this can use
1403 * the normal LOV_MAGIC_{V1,V3} layout type. If multiple OST indices are
1404 * given, then allocate an array to hold the list of indices and ensure that
1405 * the LOV_USER_MAGIC_SPECIFIC layout is used when creating the file.
1407 * \param[in] layout layout to set OST index in
1408 * \param[in] stripe_number stripe number to set index for
1409 * \param[in] ost_index the index to set
1411 * \retval 0 on success
1412 * \retval -1 if arguments are invalid or an unsupported stripe number
1413 * was specified, error returned in errno
1415 int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number,
1418 struct llapi_layout_comp *comp;
1420 comp = __llapi_layout_cur_comp(layout);
1424 if (!llapi_layout_stripe_index_is_valid(ost_index)) {
1429 if (stripe_number == 0 && ost_index == LLAPI_LAYOUT_DEFAULT) {
1430 comp->llc_stripe_offset = ost_index;
1431 comp->llc_pattern &= ~LLAPI_LAYOUT_SPECIFIC;
1432 __llapi_comp_objects_realloc(comp, 0);
1433 } else if (stripe_number >= 0 &&
1434 stripe_number < LOV_MAX_STRIPE_COUNT) {
1435 if (ost_index >= LLAPI_LAYOUT_IDX_MAX) {
1440 /* Preallocate a few more stripes to avoid realloc() overhead.*/
1441 if (__llapi_comp_objects_realloc(comp,
1442 stripe_number_roundup(stripe_number)) < 0)
1445 comp->llc_objects[stripe_number].l_ost_idx = ost_index;
1447 if (stripe_number == 0)
1448 comp->llc_stripe_offset = ost_index;
1450 comp->llc_pattern |= LLAPI_LAYOUT_SPECIFIC;
1452 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT ||
1453 comp->llc_stripe_count <= stripe_number)
1454 comp->llc_stripe_count = stripe_number + 1;
1464 * Get the OST index associated with stripe \a stripe_number.
1466 * Stripes are indexed starting from zero.
1468 * \param[in] layout layout to get index from
1469 * \param[in] stripe_number stripe number to get index for
1470 * \param[out] index integer to store index in
1472 * \retval 0 on success
1473 * \retval -1 if arguments are invalid
1475 int llapi_layout_ost_index_get(const struct llapi_layout *layout,
1476 uint64_t stripe_number, uint64_t *index)
1478 struct llapi_layout_comp *comp;
1480 comp = __llapi_layout_cur_comp(layout);
1484 if (index == NULL) {
1489 if (stripe_number >= comp->llc_stripe_count ||
1490 stripe_number >= comp->llc_objects_count) {
1495 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
1496 *index = LLAPI_LAYOUT_DEFAULT;
1498 *index = comp->llc_objects[stripe_number].l_ost_idx;
1505 * Get the pool name of layout \a layout.
1507 * \param[in] layout layout to get pool name from
1508 * \param[out] dest buffer to store pool name in
1509 * \param[in] n size in bytes of buffer \a dest
1511 * \retval 0 on success
1512 * \retval -1 if arguments are invalid
1514 int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest,
1517 struct llapi_layout_comp *comp;
1519 comp = __llapi_layout_cur_comp(layout);
1528 strncpy(dest, comp->llc_pool_name, n);
1534 * Set the name of the pool of layout \a layout.
1536 * \param[in] layout layout to set pool name in
1537 * \param[in] pool_name pool name to set
1539 * \retval 0 on success
1540 * \retval -1 if arguments are invalid or pool name is too long
1542 int llapi_layout_pool_name_set(struct llapi_layout *layout,
1543 const char *pool_name)
1545 struct llapi_layout_comp *comp;
1547 comp = __llapi_layout_cur_comp(layout);
1551 if (!llapi_pool_name_is_valid(&pool_name)) {
1556 strncpy(comp->llc_pool_name, pool_name, sizeof(comp->llc_pool_name));
1561 * Open and possibly create a file with a given \a layout.
1563 * If \a layout is NULL this function acts as a simple wrapper for
1564 * open(). By convention, ENOTTY is returned in errno if \a path
1565 * refers to a non-Lustre file.
1567 * \param[in] path name of the file to open
1568 * \param[in] open_flags open() flags
1569 * \param[in] mode permissions to create file, filtered by umask
1570 * \param[in] layout layout to create new file with
1572 * \retval non-negative file descriptor on successful open
1573 * \retval -1 if an error occurred
1575 int llapi_layout_file_open(const char *path, int open_flags, mode_t mode,
1576 const struct llapi_layout *layout)
1581 struct lov_user_md *lum;
1585 (layout != NULL && layout->llot_magic != LLAPI_LAYOUT_MAGIC)) {
1591 rc = llapi_layout_sanity((struct llapi_layout *)layout, false,
1592 !!(layout->llot_mirror_count > 1));
1594 llapi_layout_sanity_perror(rc);
1599 /* Object creation must be postponed until after layout attributes
1600 * have been applied. */
1601 if (layout != NULL && (open_flags & O_CREAT))
1602 open_flags |= O_LOV_DELAY_CREATE;
1604 fd = open(path, open_flags, mode);
1606 if (layout == NULL || fd < 0)
1609 lum = llapi_layout_to_lum(layout);
1618 if (lum->lmm_magic == LOV_USER_MAGIC_COMP_V1)
1619 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
1620 else if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC)
1621 lum_size = lov_user_md_size(lum->lmm_stripe_count,
1624 lum_size = lov_user_md_size(0, lum->lmm_magic);
1626 rc = fsetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_size, 0);
1635 errno = errno == EOPNOTSUPP ? ENOTTY : errno;
1641 * Create a file with a given \a layout.
1643 * Force O_CREAT and O_EXCL flags on so caller is assured that file was
1644 * created with the given \a layout on successful function return.
1646 * \param[in] path name of the file to open
1647 * \param[in] open_flags open() flags
1648 * \param[in] mode permissions to create new file with
1649 * \param[in] layout layout to create new file with
1651 * \retval non-negative file descriptor on successful open
1652 * \retval -1 if an error occurred
1654 int llapi_layout_file_create(const char *path, int open_flags, int mode,
1655 const struct llapi_layout *layout)
1657 return llapi_layout_file_open(path, open_flags|O_CREAT|O_EXCL, mode,
1661 int llapi_layout_flags_get(struct llapi_layout *layout, uint32_t *flags)
1663 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1668 *flags = layout->llot_flags;
1673 * Set flags to the header of a component layout.
1675 int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
1677 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1682 layout->llot_flags = flags;
1686 const char *llapi_layout_flags_string(uint32_t flags)
1688 switch (flags & LCM_FL_FLR_MASK) {
1691 case LCM_FL_WRITE_PENDING:
1693 case LCM_FL_SYNC_PENDING:
1700 __u16 llapi_layout_string_flags(char *string)
1702 if (strncmp(string, "ro", strlen(string)) == 0)
1703 return LCM_FL_RDONLY;
1704 if (strncmp(string, "wp", strlen(string)) == 0)
1705 return LCM_FL_WRITE_PENDING;
1706 if (strncmp(string, "sp", strlen(string)) == 0)
1707 return LCM_FL_SYNC_PENDING;
1713 * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
1714 * @count: Mirror count value to be checked.
1716 * This function checks the validity of mirror count.
1718 * Return: true on success or false on failure.
1720 static bool llapi_layout_mirror_count_is_valid(uint16_t count)
1722 return count >= 0 && count <= LUSTRE_MIRROR_COUNT_MAX;
1726 * llapi_layout_mirror_count_get() - Get mirror count from the header of
1728 * @layout: Layout to get mirror count from.
1729 * @count: Returned mirror count value.
1731 * This function gets mirror count from the header of a layout.
1733 * Return: 0 on success or -1 on failure.
1735 int llapi_layout_mirror_count_get(struct llapi_layout *layout,
1738 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1743 *count = layout->llot_mirror_count;
1748 * llapi_layout_mirror_count_set() - Set mirror count to the header of a layout.
1749 * @layout: Layout to set mirror count in.
1750 * @count: Mirror count value to be set.
1752 * This function sets mirror count to the header of a layout.
1754 * Return: 0 on success or -1 on failure.
1756 int llapi_layout_mirror_count_set(struct llapi_layout *layout,
1759 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1764 if (!llapi_layout_mirror_count_is_valid(count)) {
1769 layout->llot_mirror_count = count;
1774 * Fetch the start and end offset of the current layout component.
1776 * \param[in] layout the layout component
1777 * \param[out] start extent start, inclusive
1778 * \param[out] end extent end, exclusive
1780 * \retval 0 on success
1781 * \retval <0 if error occurs
1783 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
1784 uint64_t *start, uint64_t *end)
1786 struct llapi_layout_comp *comp;
1788 comp = __llapi_layout_cur_comp(layout);
1792 if (start == NULL || end == NULL) {
1797 *start = comp->llc_extent.e_start;
1798 *end = comp->llc_extent.e_end;
1804 * Set the layout extent of a layout.
1806 * \param[in] layout the layout to be set
1807 * \param[in] start extent start, inclusive
1808 * \param[in] end extent end, exclusive
1810 * \retval 0 on success
1811 * \retval <0 if error occurs
1813 int llapi_layout_comp_extent_set(struct llapi_layout *layout,
1814 uint64_t start, uint64_t end)
1816 struct llapi_layout_comp *comp;
1818 comp = __llapi_layout_cur_comp(layout);
1827 comp->llc_extent.e_start = start;
1828 comp->llc_extent.e_end = end;
1829 layout->llot_is_composite = true;
1835 * Gets the attribute flags of the current component.
1837 * \param[in] layout the layout component
1838 * \param[out] flags stored the returned component flags
1840 * \retval 0 on success
1841 * \retval <0 if error occurs
1843 int llapi_layout_comp_flags_get(const struct llapi_layout *layout,
1846 struct llapi_layout_comp *comp;
1848 comp = __llapi_layout_cur_comp(layout);
1852 if (flags == NULL) {
1857 *flags = comp->llc_flags;
1863 * Sets the specified flags of the current component leaving other flags as-is.
1865 * \param[in] layout the layout component
1866 * \param[in] flags component flags to be set
1868 * \retval 0 on success
1869 * \retval <0 if error occurs
1871 int llapi_layout_comp_flags_set(struct llapi_layout *layout, uint32_t flags)
1873 struct llapi_layout_comp *comp;
1875 comp = __llapi_layout_cur_comp(layout);
1879 comp->llc_flags |= flags;
1885 * Clears the flags specified in the flags leaving other flags as-is.
1887 * \param[in] layout the layout component
1888 * \param[in] flags component flags to be cleared
1890 * \retval 0 on success
1891 * \retval <0 if error occurs
1893 int llapi_layout_comp_flags_clear(struct llapi_layout *layout,
1896 struct llapi_layout_comp *comp;
1898 comp = __llapi_layout_cur_comp(layout);
1902 comp->llc_flags &= ~flags;
1908 * Fetches the file-unique component ID of the current layout component.
1910 * \param[in] layout the layout component
1911 * \param[out] id stored the returned component ID
1913 * \retval 0 on success
1914 * \retval <0 if error occurs
1916 int llapi_layout_comp_id_get(const struct llapi_layout *layout, uint32_t *id)
1918 struct llapi_layout_comp *comp;
1920 comp = __llapi_layout_cur_comp(layout);
1934 * Return the mirror id of the current layout component.
1936 * \param[in] layout the layout component
1937 * \param[out] id stored the returned mirror ID
1939 * \retval 0 on success
1940 * \retval <0 if error occurs
1942 int llapi_layout_mirror_id_get(const struct llapi_layout *layout, uint32_t *id)
1944 struct llapi_layout_comp *comp;
1946 comp = __llapi_layout_cur_comp(layout);
1955 *id = mirror_id_of(comp->llc_id);
1961 * Adds a component to \a layout, the new component will be added to
1962 * the tail of components list and it'll inherit attributes of existing
1963 * ones. The \a layout will change it's current component pointer to
1964 * the newly added component, and it'll be turned into a composite
1965 * layout if it was not before the adding.
1967 * \param[in] layout existing composite or plain layout
1969 * \retval 0 on success
1970 * \retval <0 if error occurs
1972 int llapi_layout_comp_add(struct llapi_layout *layout)
1974 struct llapi_layout_comp *last, *comp, *new;
1975 bool composite = layout->llot_is_composite;
1977 comp = __llapi_layout_cur_comp(layout);
1981 new = __llapi_comp_alloc(0);
1985 last = list_last_entry(&layout->llot_comp_list, typeof(*last),
1988 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1990 /* We must mark the layout composite for the sanity check, but it may
1991 * not stay that way if the check fails */
1992 layout->llot_is_composite = true;
1993 layout->llot_cur_comp = new;
1995 /* We need to set a temporary non-zero value for "end" when we call
1996 * comp_extent_set, so we use LUSTRE_EOF-1, which is > all allowed
1997 * for the end of the previous component. (If we're adding this
1998 * component, the end of the previous component cannot be EOF.) */
1999 if (llapi_layout_comp_extent_set(layout, last->llc_extent.e_end,
2001 llapi_layout_comp_del(layout);
2002 layout->llot_is_composite = composite;
2009 * Adds a first component of a mirror to \a layout.
2010 * The \a layout will change it's current component pointer to
2011 * the newly added component, and it'll be turned into a composite
2012 * layout if it was not before the adding.
2014 * \param[in] layout existing composite or plain layout
2016 * \retval 0 on success
2017 * \retval <0 if error occurs
2019 int llapi_layout_add_first_comp(struct llapi_layout *layout)
2021 struct llapi_layout_comp *comp, *new;
2023 comp = __llapi_layout_cur_comp(layout);
2027 new = __llapi_comp_alloc(0);
2031 new->llc_extent.e_start = 0;
2033 list_add_tail(&new->llc_list, &layout->llot_comp_list);
2034 layout->llot_cur_comp = new;
2035 layout->llot_is_composite = true;
2041 * Deletes current component from the composite layout. The component
2042 * to be deleted must be the tail of components list, and it can't be
2043 * the only component in the layout.
2045 * \param[in] layout composite layout
2047 * \retval 0 on success
2048 * \retval <0 if error occurs
2050 int llapi_layout_comp_del(struct llapi_layout *layout)
2052 struct llapi_layout_comp *comp;
2054 comp = __llapi_layout_cur_comp(layout);
2058 if (!layout->llot_is_composite) {
2063 /* It must be the tail of the list (for PFL, can be relaxed
2064 * once we get mirrored components) */
2065 if (comp->llc_list.next != &layout->llot_comp_list) {
2069 layout->llot_cur_comp =
2070 list_last_entry(&comp->llc_list, typeof(*comp), llc_list);
2071 if (comp->llc_list.prev == &layout->llot_comp_list)
2072 layout->llot_cur_comp = NULL;
2074 list_del_init(&comp->llc_list);
2075 __llapi_comp_free(comp);
2081 * Move the current component pointer to the component with
2082 * specified component ID.
2084 * \param[in] layout composite layout
2085 * \param[in] id component ID
2087 * \retval =0 : moved successfully
2088 * \retval <0 if error occurs
2090 int llapi_layout_comp_use_id(struct llapi_layout *layout, uint32_t comp_id)
2092 struct llapi_layout_comp *comp;
2094 comp = __llapi_layout_cur_comp(layout);
2096 return -1; /* use previously set errno */
2098 if (!layout->llot_is_composite) {
2103 if (comp_id == LCME_ID_INVAL) {
2108 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
2109 if (comp->llc_id == comp_id) {
2110 layout->llot_cur_comp = comp;
2119 * Move the current component pointer to a specified position.
2121 * \param[in] layout composite layout
2122 * \param[in] pos the position to be moved, it can be:
2123 * LLAPI_LAYOUT_COMP_USE_FIRST: use first component
2124 * LLAPI_LAYOUT_COMP_USE_LAST: use last component
2125 * LLAPI_LAYOUT_COMP_USE_NEXT: use component after current
2126 * LLAPI_LAYOUT_COMP_USE_PREV: use component before current
2128 * \retval =0 : moved successfully
2129 * \retval =1 : at last component with NEXT, at first component with PREV
2130 * \retval <0 if error occurs
2132 int llapi_layout_comp_use(struct llapi_layout *layout,
2133 enum llapi_layout_comp_use pos)
2135 struct llapi_layout_comp *comp, *head, *tail;
2137 comp = __llapi_layout_cur_comp(layout);
2141 if (!layout->llot_is_composite) {
2142 if (pos == LLAPI_LAYOUT_COMP_USE_FIRST ||
2143 pos == LLAPI_LAYOUT_COMP_USE_LAST)
2149 head = list_first_entry(&layout->llot_comp_list, typeof(*head),
2151 tail = list_last_entry(&layout->llot_comp_list, typeof(*tail),
2154 case LLAPI_LAYOUT_COMP_USE_FIRST:
2155 layout->llot_cur_comp = head;
2157 case LLAPI_LAYOUT_COMP_USE_NEXT:
2162 layout->llot_cur_comp = list_first_entry(&comp->llc_list,
2166 case LLAPI_LAYOUT_COMP_USE_LAST:
2167 layout->llot_cur_comp = tail;
2169 case LLAPI_LAYOUT_COMP_USE_PREV:
2174 layout->llot_cur_comp = list_last_entry(&comp->llc_list,
2187 * Add layout component(s) to an existing file.
2189 * \param[in] path The path name of the file
2190 * \param[in] layout The layout component(s) to be added
2192 int llapi_layout_file_comp_add(const char *path,
2193 const struct llapi_layout *layout)
2195 int rc, fd = -1, lum_size, tmp_errno = 0;
2196 struct llapi_layout *existing_layout = NULL;
2197 struct lov_user_md *lum = NULL;
2199 if (path == NULL || layout == NULL ||
2200 layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
2205 fd = open(path, O_RDWR);
2212 existing_layout = llapi_layout_get_by_fd(fd, 0);
2213 if (existing_layout == NULL) {
2219 rc = llapi_layout_merge(&existing_layout, layout);
2226 rc = llapi_layout_sanity(existing_layout, false, false);
2229 llapi_layout_sanity_perror(rc);
2234 lum = llapi_layout_to_lum(layout);
2241 if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) {
2246 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2248 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".add", lum, lum_size, 0);
2258 llapi_layout_free(existing_layout);
2264 * Delete component(s) by the specified component id or component flags
2265 * from an existing file.
2267 * \param[in] path path name of the file
2268 * \param[in] id unique component ID
2269 * \param[in] flags flags: LCME_FL_* or;
2270 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2272 int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags)
2274 int rc = 0, fd = -1, lum_size, tmp_errno = 0;
2275 struct llapi_layout *layout;
2276 struct llapi_layout_comp *comp, *next;
2277 struct llapi_layout *existing_layout = NULL;
2278 struct lov_user_md *lum = NULL;
2280 if (path == NULL || id > LCME_ID_MAX || (flags & ~LCME_KNOWN_FLAGS)) {
2285 /* Can only specify ID or flags, not both, not none. */
2286 if ((id != LCME_ID_INVAL && flags != 0) ||
2287 (id == LCME_ID_INVAL && flags == 0)) {
2292 layout = llapi_layout_alloc();
2296 llapi_layout_comp_extent_set(layout, 0, LUSTRE_EOF);
2297 comp = __llapi_layout_cur_comp(layout);
2305 comp->llc_flags = flags;
2307 lum = llapi_layout_to_lum(layout);
2313 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2315 fd = open(path, O_RDWR);
2322 existing_layout = llapi_layout_get_by_fd(fd, 0);
2323 if (existing_layout == NULL) {
2331 while (rc == 0 && existing_layout->llot_cur_comp != NULL) {
2332 rc = llapi_layout_comp_use(existing_layout, comp ?
2333 LLAPI_LAYOUT_COMP_USE_PREV :
2334 LLAPI_LAYOUT_COMP_USE_LAST);
2339 comp = __llapi_layout_cur_comp(existing_layout);
2345 if (id != LCME_ID_INVAL && id != comp->llc_id)
2347 else if ((flags & LCME_FL_NEG) && (flags & comp->llc_flags))
2349 else if (flags && !(flags & comp->llc_flags))
2352 rc = llapi_layout_comp_del(existing_layout);
2353 /* the layout position is moved to previous one, adjust */
2361 rc = llapi_layout_sanity(existing_layout, false, false);
2364 llapi_layout_sanity_perror(rc);
2369 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".del", lum, lum_size, 0);
2380 llapi_layout_free(layout);
2381 llapi_layout_free(existing_layout);
2387 /* Internal utility function to apply flags for sanity checking */
2388 static void llapi_layout_comp_apply_flags(struct llapi_layout_comp *comp,
2391 if (flags & LCME_FL_NEG)
2392 comp->llc_flags &= ~flags;
2394 comp->llc_flags |= flags;
2397 struct llapi_layout_apply_flags_args {
2399 uint32_t *lfa_flags;
2405 static int llapi_layout_apply_flags_cb(struct llapi_layout *layout,
2408 struct llapi_layout_apply_flags_args *args = arg;
2409 struct llapi_layout_comp *comp;
2412 comp = __llapi_layout_cur_comp(layout);
2415 return LLAPI_LAYOUT_ITER_STOP;
2418 for (i = 0; i < args->lfa_count; i++) {
2419 if (comp->llc_id == args->lfa_ids[i])
2420 llapi_layout_comp_apply_flags(comp, args->lfa_flags[i]);
2423 return LLAPI_LAYOUT_ITER_CONT;
2426 /* Apply flags to the layout for sanity checking */
2427 static int llapi_layout_apply_flags(struct llapi_layout *layout, uint32_t *ids,
2428 uint32_t *flags, int count)
2430 struct llapi_layout_apply_flags_args args;
2433 if (!ids || !flags || count == 0) {
2439 args.lfa_flags = flags;
2440 args.lfa_count = count;
2443 rc = llapi_layout_comp_iterate(layout,
2444 llapi_layout_apply_flags_cb,
2446 if (errno == ENOENT)
2449 if (rc != LLAPI_LAYOUT_ITER_CONT)
2455 * Change flags by component ID of components of an existing file.
2456 * The component to be modified is specified by the comp->lcme_id value,
2457 * which must be a unique component ID.
2459 * \param[in] path path name of the file
2460 * \param[in] ids An array of component IDs
2461 * \param[in] flags flags: LCME_FL_* or;
2462 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2463 * \param[in] count Number of elements in ids and flags array
2465 int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags,
2468 int rc = -1, fd = -1, i, tmp_errno = 0;
2470 struct llapi_layout *existing_layout = NULL;
2471 struct llapi_layout *layout = NULL;
2472 struct llapi_layout_comp *comp;
2473 struct lov_user_md *lum = NULL;
2483 for (i = 0; i < count; i++) {
2484 if (!ids[i] || !flags[i]) {
2489 if (ids[i] > LCME_ID_MAX || (flags[i] & ~LCME_KNOWN_FLAGS)) {
2494 /* do not allow to set or clear INIT flag */
2495 if (flags[i] & LCME_FL_INIT) {
2501 fd = open(path, O_RDWR);
2508 existing_layout = llapi_layout_get_by_fd(fd, 0);
2509 if (existing_layout == NULL) {
2515 if (llapi_layout_apply_flags(existing_layout, ids, flags, count)) {
2521 rc = llapi_layout_sanity(existing_layout, false, false);
2524 llapi_layout_sanity_perror(rc);
2529 layout = __llapi_layout_alloc();
2530 if (layout == NULL) {
2536 layout->llot_is_composite = true;
2537 for (i = 0; i < count; i++) {
2538 comp = __llapi_comp_alloc(0);
2545 comp->llc_id = ids[i];
2546 comp->llc_flags = flags[i];
2548 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
2549 layout->llot_cur_comp = comp;
2552 lum = llapi_layout_to_lum(layout);
2559 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2561 /* flush cached pages from clients */
2562 rc = llapi_file_flush(fd);
2569 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0);
2582 llapi_layout_free(existing_layout);
2583 llapi_layout_free(layout);
2589 * Check if the file layout is composite.
2591 * \param[in] layout the file layout to check
2593 * \retval true composite
2594 * \retval false not composite
2596 bool llapi_layout_is_composite(struct llapi_layout *layout)
2598 return layout->llot_is_composite;
2602 * Iterate every components in the @layout and call callback function @cb.
2604 * \param[in] layout component layout list.
2605 * \param[in] cb callback for each component
2606 * \param[in] cbdata callback data
2608 * \retval < 0 error happens during the iteration
2609 * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error
2610 * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration
2612 int llapi_layout_comp_iterate(struct llapi_layout *layout,
2613 llapi_layout_iter_cb cb, void *cbdata)
2617 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2622 * make sure on success llapi_layout_comp_use() API returns 0 with
2628 rc = cb(layout, cbdata);
2629 if (rc != LLAPI_LAYOUT_ITER_CONT)
2632 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2635 else if (rc == 1) /* reached the last comp */
2636 return LLAPI_LAYOUT_ITER_CONT;
2643 * llapi_layout_merge() - Merge a composite layout into another one.
2644 * @dst_layout: Destination composite layout.
2645 * @src_layout: Source composite layout.
2647 * This function copies all of the components from @src_layout and
2648 * appends them to @dst_layout.
2650 * Return: 0 on success or -1 on failure.
2652 int llapi_layout_merge(struct llapi_layout **dst_layout,
2653 const struct llapi_layout *src_layout)
2655 struct llapi_layout *new_layout = *dst_layout;
2656 struct llapi_layout_comp *new = NULL;
2657 struct llapi_layout_comp *comp = NULL;
2660 if (src_layout == NULL ||
2661 list_empty((struct list_head *)&src_layout->llot_comp_list))
2664 if (new_layout == NULL) {
2665 new_layout = __llapi_layout_alloc();
2666 if (new_layout == NULL) {
2672 list_for_each_entry(comp, &src_layout->llot_comp_list, llc_list) {
2673 new = __llapi_comp_alloc(0);
2679 new->llc_pattern = comp->llc_pattern;
2680 new->llc_stripe_size = comp->llc_stripe_size;
2681 new->llc_stripe_count = comp->llc_stripe_count;
2682 new->llc_stripe_offset = comp->llc_stripe_offset;
2684 if (comp->llc_pool_name[0] != '\0')
2685 strncpy(new->llc_pool_name, comp->llc_pool_name,
2686 sizeof(new->llc_pool_name));
2688 for (i = 0; i < comp->llc_objects_count; i++) {
2689 if (__llapi_comp_objects_realloc(new,
2690 stripe_number_roundup(i)) < 0) {
2692 __llapi_comp_free(new);
2695 new->llc_objects[i].l_ost_idx = \
2696 comp->llc_objects[i].l_ost_idx;
2699 new->llc_objects_count = comp->llc_objects_count;
2700 new->llc_extent.e_start = comp->llc_extent.e_start;
2701 new->llc_extent.e_end = comp->llc_extent.e_end;
2702 new->llc_id = comp->llc_id;
2703 new->llc_flags = comp->llc_flags;
2705 list_add_tail(&new->llc_list, &new_layout->llot_comp_list);
2706 new_layout->llot_cur_comp = new;
2708 new_layout->llot_is_composite = true;
2710 *dst_layout = new_layout;
2713 llapi_layout_free(new_layout);
2718 * Get the last initialized component
2720 * \param[in] layout component layout list.
2723 * \retval -EINVAL not found
2724 * \retval -EISDIR directory layout
2726 int llapi_layout_get_last_init_comp(struct llapi_layout *layout)
2728 struct llapi_layout_comp *comp = NULL, *head = NULL;
2730 if (!layout->llot_is_composite)
2733 head = list_first_entry(&layout->llot_comp_list, typeof(*comp),
2737 if (head->llc_id == 0 && !(head->llc_flags & LCME_FL_INIT))
2741 /* traverse the components from the tail to find the last init one */
2742 comp = list_last_entry(&layout->llot_comp_list, typeof(*comp),
2744 while (comp != head) {
2745 if (comp->llc_flags & LCME_FL_INIT)
2747 comp = list_last_entry(&comp->llc_list, typeof(*comp),
2751 layout->llot_cur_comp = comp;
2753 return comp->llc_flags & LCME_FL_INIT ? 0 : -EINVAL;
2757 * Interit stripe info from the file's component to the mirror
2759 * \param[in] layout file component layout list.
2760 * \param[in] layout mirro component layout list.
2762 * \retval 0 on success
2763 * \retval -EINVAL on error
2765 int llapi_layout_mirror_inherit(struct llapi_layout *f_layout,
2766 struct llapi_layout *m_layout)
2768 struct llapi_layout_comp *m_comp = NULL;
2769 struct llapi_layout_comp *f_comp = NULL;
2772 f_comp = __llapi_layout_cur_comp(f_layout);
2775 m_comp = __llapi_layout_cur_comp(m_layout);
2779 /* DoM component does not inherit stripe size */
2780 if (m_comp->llc_pattern != LLAPI_LAYOUT_MDT)
2781 m_comp->llc_stripe_size = f_comp->llc_stripe_size;
2782 m_comp->llc_stripe_count = f_comp->llc_stripe_count;
2788 * Find all stale components.
2790 * \param[in] layout component layout list.
2791 * \param[out] comp array of stale component info.
2792 * \param[in] comp_size array size of @comp.
2793 * \param[in] mirror_ids array of mirror id that only components
2794 * belonging to these mirror will be collected.
2795 * \param[in] ids_nr number of mirror ids array.
2797 * \retval number of component info collected on success or
2798 * an error code on failure.
2800 int llapi_mirror_find_stale(struct llapi_layout *layout,
2801 struct llapi_resync_comp *comp, size_t comp_size,
2802 __u16 *mirror_ids, int ids_nr)
2807 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2815 uint64_t start, end;
2817 rc = llapi_layout_comp_flags_get(layout, &flags);
2821 if (!(flags & LCME_FL_STALE))
2824 rc = llapi_layout_mirror_id_get(layout, &mirror_id);
2828 /* the caller only wants stale components from specific
2833 for (j = 0; j < ids_nr; j++) {
2834 if (mirror_ids[j] == mirror_id)
2838 /* not in the specified mirror */
2841 } else if (flags & LCME_FL_NOSYNC) {
2842 /* if not specified mirrors, do not resync "nosync"
2847 rc = llapi_layout_comp_id_get(layout, &id);
2851 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2855 /* pack this component into @comp array */
2856 comp[idx].lrc_id = id;
2857 comp[idx].lrc_mirror_id = mirror_id;
2858 comp[idx].lrc_start = start;
2859 comp[idx].lrc_end = end;
2862 if (idx >= comp_size) {
2868 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2875 return rc < 0 ? rc : idx;
2878 /* locate @layout to a valid component covering file [file_start, file_end) */
2879 int llapi_mirror_find(struct llapi_layout *layout, uint64_t file_start,
2880 uint64_t file_end, uint64_t *endp)
2882 uint32_t mirror_id = 0;
2885 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2891 uint64_t start, end;
2892 uint32_t flags, id, rid;
2894 rc = llapi_layout_comp_flags_get(layout, &flags);
2898 if (flags & LCME_FL_STALE)
2901 rc = llapi_layout_mirror_id_get(layout, &rid);
2905 rc = llapi_layout_comp_id_get(layout, &id);
2909 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2913 if (file_start >= start && file_start < end) {
2916 else if (mirror_id != rid || *endp != start)
2919 file_start = *endp = end;
2920 if (end >= file_end)
2925 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2935 #ifndef NSEC_PER_SEC
2936 # define NSEC_PER_SEC 1000000000UL
2938 #define ONE_MB 0x100000
2939 static struct timespec timespec_sub(struct timespec *before,
2940 struct timespec *after)
2942 struct timespec ret;
2944 ret.tv_sec = after->tv_sec - before->tv_sec;
2945 if (after->tv_nsec < before->tv_nsec) {
2947 ret.tv_nsec = NSEC_PER_SEC + after->tv_nsec - before->tv_nsec;
2949 ret.tv_nsec = after->tv_nsec - before->tv_nsec;
2955 static void stats_log(struct timespec *now, struct timespec *start_time,
2956 ssize_t read_bytes, size_t write_bytes,
2957 off_t file_size_bytes)
2959 struct timespec diff = timespec_sub(start_time, now);
2961 if (file_size_bytes == 0)
2964 if (diff.tv_sec == 0 && diff.tv_nsec == 0)
2967 llapi_printf(LLAPI_MSG_NORMAL,
2968 "- { seconds: %li, rmbps: %5.2g, wmbps: %5.2g, copied: %lu, size: %lu, pct: %lu%% }\n",
2970 (double) read_bytes/((ONE_MB * diff.tv_sec) +
2971 ((ONE_MB * diff.tv_nsec)/NSEC_PER_SEC)),
2972 (double) write_bytes/((ONE_MB * diff.tv_sec) +
2973 ((ONE_MB * diff.tv_nsec)/NSEC_PER_SEC)),
2975 file_size_bytes/ONE_MB,
2976 ((write_bytes*100)/file_size_bytes));
2979 int llapi_mirror_resync_many_params(int fd, struct llapi_layout *layout,
2980 struct llapi_resync_comp *comp_array,
2981 int comp_size, uint64_t start,
2983 unsigned long stats_interval_sec,
2984 unsigned long bandwidth_bytes_sec)
2986 size_t page_size = sysconf(_SC_PAGESIZE);
2987 size_t buflen = 64 << 20; /* 64M */
2989 uint64_t pos = start;
2990 uint64_t data_off = pos, data_end = pos;
2991 uint64_t mirror_end = LUSTRE_EOF;
2996 struct timespec start_time;
2997 struct timespec now;
2998 struct timespec last_bw_print;
2999 size_t total_bytes_read = 0;
3000 size_t total_bytes_written = 0;
3001 off_t write_estimation_bytes = 0;
3003 if (bandwidth_bytes_sec > 0 || stats_interval_sec) {
3006 rc = fstat(fd, &st);
3009 write_estimation_bytes = st.st_size * comp_size;
3012 /* limit transfer size to what can be sent in one second */
3013 if (bandwidth_bytes_sec && bandwidth_bytes_sec < buflen)
3014 buflen = (bandwidth_bytes_sec + ONE_MB - 1) & ~(ONE_MB - 1);
3015 rc = posix_memalign(&buf, page_size, buflen);
3019 clock_gettime(CLOCK_MONOTONIC, &start_time);
3020 now = last_bw_print = start_time;
3028 if (pos >= data_end) {
3031 if (pos >= mirror_end || !src) {
3032 rc = llapi_mirror_find(layout, pos, end,
3037 /* restrict mirror end by resync end */
3038 mirror_end = MIN(end, mirror_end);
3041 tmp_off = llapi_mirror_data_seek(fd, src, pos,
3044 /* switch to full copy */
3045 to_read = mirror_end - pos;
3049 data_end = data_off + data_size;
3051 data_off = MIN(data_off, mirror_end);
3052 data_end = MIN(data_end, mirror_end);
3054 /* align by page, if there is data block to copy */
3056 data_off &= ~(page_size - 1);
3059 if (pos < data_off) {
3060 for (i = 0; i < comp_size; i++) {
3063 uint32_t mid = comp_array[i].lrc_mirror_id;
3065 /* skip non-overlapped component */
3066 if (pos >= comp_array[i].lrc_end ||
3067 data_off <= comp_array[i].lrc_start)
3070 if (pos < comp_array[i].lrc_start)
3071 cur_pos = comp_array[i].lrc_start;
3075 if (data_off > comp_array[i].lrc_end)
3076 to_punch = comp_array[i].lrc_end -
3079 to_punch = data_off - cur_pos;
3081 if (comp_array[i].lrc_end == OBD_OBJECT_EOF)
3082 /* the last component can be truncated
3085 rc = llapi_mirror_truncate(fd, mid,
3088 rc = llapi_mirror_punch(fd, mid,
3091 * hole at the end of file, so just truncate up
3094 if (!rc && data_off == data_end && !data_size)
3095 rc = llapi_mirror_truncate(fd,
3097 /* if failed then read failed hole range */
3101 if (pos + to_punch == data_off)
3102 to_read = data_end - pos;
3110 if (pos == mirror_end)
3112 to_read = data_end - pos;
3117 assert(data_end <= mirror_end);
3119 to_read = MIN(buflen, to_read);
3120 to_read = ((to_read - 1) | (page_size - 1)) + 1;
3121 bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
3122 if (bytes_read == 0) {
3126 if (bytes_read < 0) {
3130 total_bytes_read += bytes_read;
3132 /* round up to page align to make direct IO happy. */
3133 to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
3135 for (i = 0; i < comp_size; i++) {
3136 unsigned long long write_target;
3137 struct timespec diff;
3140 size_t to_write2 = to_write;
3142 /* skip non-overlapped component */
3143 if (pos >= comp_array[i].lrc_end ||
3144 pos + to_write <= comp_array[i].lrc_start)
3147 if (pos < comp_array[i].lrc_start)
3148 pos2 = comp_array[i].lrc_start;
3150 to_write2 -= pos2 - pos;
3152 if ((pos + to_write) > comp_array[i].lrc_end)
3153 to_write2 -= pos + to_write -
3154 comp_array[i].lrc_end;
3156 written = llapi_mirror_write(fd,
3157 comp_array[i].lrc_mirror_id,
3162 * this component is not written successfully,
3163 * mark it using its lrc_synced, it is supposed
3164 * to be false before getting here.
3166 * And before this function returns, all
3167 * elements of comp_array will reverse their
3168 * lrc_synced flag to reflect their true
3171 comp_array[i].lrc_synced = true;
3172 llapi_error(LLAPI_MSG_ERROR, written,
3173 "component %u not synced",
3174 comp_array[i].lrc_id);
3179 assert(written == to_write2);
3180 total_bytes_written += written;
3182 if (bandwidth_bytes_sec == 0)
3185 clock_gettime(CLOCK_MONOTONIC, &now);
3186 diff = timespec_sub(&start_time, &now);
3187 write_target = ((bandwidth_bytes_sec * diff.tv_sec) +
3188 ((bandwidth_bytes_sec *
3189 diff.tv_nsec)/NSEC_PER_SEC));
3191 if (write_target < total_bytes_written) {
3192 unsigned long long excess;
3193 struct timespec delay = { 0, 0 };
3195 excess = total_bytes_written - write_target;
3200 delay.tv_sec = excess / bandwidth_bytes_sec;
3201 delay.tv_nsec = (excess % bandwidth_bytes_sec) *
3202 NSEC_PER_SEC / bandwidth_bytes_sec;
3205 rc = clock_nanosleep(CLOCK_MONOTONIC, 0,
3207 } while (rc < 0 && errno == EINTR);
3210 llapi_error(LLAPI_MSG_ERROR, rc,
3211 "errors: delay for bandwidth control failed: %s\n",
3217 if (stats_interval_sec) {
3218 clock_gettime(CLOCK_MONOTONIC, &now);
3219 if ((total_bytes_written != end - start) &&
3220 (now.tv_sec >= last_bw_print.tv_sec +
3221 stats_interval_sec)) {
3222 stats_log(&now, &start_time,
3224 total_bytes_written,
3225 write_estimation_bytes);
3226 last_bw_print = now;
3236 /* fatal error happens */
3237 for (i = 0; i < comp_size; i++)
3238 comp_array[i].lrc_synced = false;
3242 /* Output at least one log, regardless of stats_interval */
3243 if (stats_interval_sec) {
3244 clock_gettime(CLOCK_MONOTONIC, &now);
3245 stats_log(&now, &start_time, total_bytes_read,
3246 total_bytes_written,
3247 write_estimation_bytes);
3251 * no fatal error happens, each lrc_synced tells whether the component
3252 * has been resync successfully (note: we'd reverse the value to
3253 * reflect its true meaning.
3255 for (i = 0; i < comp_size; i++) {
3256 comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
3257 if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
3258 rc = llapi_mirror_truncate(fd,
3259 comp_array[i].lrc_mirror_id, pos);
3260 /* Ignore truncate error on encrypted file without the
3261 * key if tried on LUSTRE_ENCRYPTION_UNIT_SIZE boundary.
3263 if (rc < 0 && (rc != -ENOKEY ||
3264 pos & ~LUSTRE_ENCRYPTION_MASK))
3265 comp_array[i].lrc_synced = false;
3270 * returns the first error code for partially successful resync if
3276 int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
3277 struct llapi_resync_comp *comp_array,
3278 int comp_size, uint64_t start, uint64_t end)
3280 return llapi_mirror_resync_many_params(fd, layout, comp_array,
3281 comp_size, start, end, 0, 0);
3284 enum llapi_layout_comp_sanity_error {
3286 LSE_INCOMPLETE_MIRROR,
3287 LSE_ADJACENT_EXTENSION,
3291 LSE_DOM_EXTENSION_FOLLOWING,
3294 LSE_NOT_ZERO_LENGTH_EXTENDABLE,
3295 LSE_END_NOT_GREATER,
3296 LSE_ZERO_LENGTH_NORMAL,
3297 LSE_NOT_ADJACENT_PREV,
3304 const char *const llapi_layout_strerror[] =
3307 [LSE_INCOMPLETE_MIRROR] =
3308 "Incomplete mirror - must go to EOF",
3309 [LSE_ADJACENT_EXTENSION] =
3310 "No adjacent extension space components",
3311 [LSE_INIT_EXTENSION] =
3312 "Cannot apply extension flag to init components",
3315 [LSE_DOM_EXTENSION] =
3316 "DoM components can't be extension space",
3317 [LSE_DOM_EXTENSION_FOLLOWING] =
3318 "DoM components cannot be followed by extension space",
3320 "DoM component should be the first one in a file/mirror",
3321 [LSE_SET_COMP_START] =
3322 "Must set previous component extent before adding next",
3323 [LSE_NOT_ZERO_LENGTH_EXTENDABLE] =
3324 "Extendable component must start out zero-length",
3325 [LSE_END_NOT_GREATER] =
3326 "Component end is before end of previous component",
3327 [LSE_ZERO_LENGTH_NORMAL] =
3328 "Zero length components must be followed by extension",
3329 [LSE_NOT_ADJACENT_PREV] =
3330 "Components not adjacent (end != next->start",
3331 [LSE_START_GT_END] =
3332 "Component start is > end",
3334 "The component end must be aligned by the stripe size",
3336 "The extension size must be aligned by the stripe size",
3339 struct llapi_layout_sanity_args {
3340 bool lsa_incomplete;
3346 /* The component flags can be set by users at creation/modification time. */
3347 #define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
3351 * When modified, adjust llapi_stripe_param_verify() if needed as well.
3353 static int llapi_layout_sanity_cb(struct llapi_layout *layout,
3356 struct llapi_layout_comp *comp, *next, *prev;
3357 struct llapi_layout_sanity_args *args = arg;
3358 bool first_comp = false;
3360 comp = __llapi_layout_cur_comp(layout);
3366 if (comp->llc_list.prev != &layout->llot_comp_list)
3367 prev = list_last_entry(&comp->llc_list, typeof(*prev),
3372 if (comp->llc_list.next != &layout->llot_comp_list)
3373 next = list_first_entry(&comp->llc_list, typeof(*next),
3378 /* Start of zero implies a new mirror */
3379 if (comp->llc_extent.e_start == 0) {
3381 /* Most checks apply only within one mirror, this is an
3383 if (prev && prev->llc_extent.e_end != LUSTRE_EOF) {
3384 args->lsa_rc = LSE_INCOMPLETE_MIRROR;
3391 if (next && next->llc_extent.e_start == 0)
3394 /* Flag sanity checks */
3395 /* No adjacent extension components */
3396 if ((comp->llc_flags & LCME_FL_EXTENSION) && next &&
3397 (next->llc_flags & LCME_FL_EXTENSION)) {
3398 args->lsa_rc = LSE_ADJACENT_EXTENSION;
3402 /* Extension flag cannot be applied to init components and the first
3403 * component of each mirror is automatically init */
3404 if ((comp->llc_flags & LCME_FL_EXTENSION) &&
3405 (comp->llc_flags & LCME_FL_INIT || first_comp)) {
3406 args->lsa_rc = LSE_INIT_EXTENSION;
3410 if (comp->llc_ondisk) {
3411 if (comp->llc_flags & LCME_FL_NEG)
3412 args->lsa_rc = LSE_FLAGS;
3413 } else if (!args->lsa_incomplete) {
3414 if (args->lsa_flr) {
3415 if (comp->llc_flags & ~LCME_USER_COMP_FLAGS)
3416 args->lsa_rc = LSE_FLAGS;
3418 if (comp->llc_flags &
3419 ~(LCME_FL_EXTENSION | LCME_FL_PREF_RW))
3420 args->lsa_rc = LSE_FLAGS;
3426 /* DoM sanity checks */
3427 if (comp->llc_pattern == LLAPI_LAYOUT_MDT ||
3428 comp->llc_pattern == LOV_PATTERN_MDT) {
3429 /* DoM components can't be extension components */
3430 if (comp->llc_flags & LCME_FL_EXTENSION) {
3431 args->lsa_rc = LSE_DOM_EXTENSION;
3434 /* DoM components cannot be followed by an extension comp */
3435 if (next && (next->llc_flags & LCME_FL_EXTENSION)) {
3436 args->lsa_rc = LSE_DOM_EXTENSION_FOLLOWING;
3440 /* DoM should be the first component in a mirror */
3442 args->lsa_rc = LSE_DOM_FIRST;
3448 /* Extent sanity checks */
3449 /* Must set previous component extent before adding another */
3450 if (prev && prev->llc_extent.e_start == 0 &&
3451 prev->llc_extent.e_end == 0) {
3452 args->lsa_rc = LSE_SET_COMP_START;
3456 if (!args->lsa_incomplete) {
3457 /* Components followed by extension space (extendable
3458 * components) must be zero length before initialization.
3459 * (Except for first comp, which will be initialized on
3461 if (next && (next->llc_flags & LCME_FL_EXTENSION) &&
3462 !first_comp && !(comp->llc_flags & LCME_FL_INIT) &&
3463 comp->llc_extent.e_start != comp->llc_extent.e_end) {
3464 args->lsa_rc = LSE_NOT_ZERO_LENGTH_EXTENDABLE;
3468 /* End must come after end of previous comp */
3469 if (prev && comp->llc_extent.e_end < prev->llc_extent.e_end) {
3470 args->lsa_rc = LSE_END_NOT_GREATER;
3474 /* Components not followed by ext space must have length > 0. */
3475 if (comp->llc_extent.e_start == comp->llc_extent.e_end &&
3476 (next == NULL || !(next->llc_flags & LCME_FL_EXTENSION))) {
3477 args->lsa_rc = LSE_ZERO_LENGTH_NORMAL;
3481 /* The component end must be aligned by the stripe size */
3482 if ((comp->llc_flags & LCME_FL_EXTENSION) &&
3483 (prev->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
3484 if (comp->llc_extent.e_end != LUSTRE_EOF &&
3485 comp->llc_extent.e_end % prev->llc_stripe_size) {
3486 args->lsa_rc = LSE_ALIGN_END;
3489 if ((comp->llc_stripe_size * SEL_UNIT_SIZE) %
3490 prev->llc_stripe_size) {
3491 args->lsa_rc = LSE_ALIGN_EXT;
3494 } else if (!(comp->llc_flags & LCME_FL_EXTENSION) &&
3495 (comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT)) {
3496 if (comp->llc_extent.e_end != LUSTRE_EOF &&
3497 comp->llc_extent.e_end !=
3498 comp->llc_extent.e_start &&
3499 comp->llc_extent.e_end % comp->llc_stripe_size) {
3500 args->lsa_rc = LSE_ALIGN_END;
3506 /* Components must have start == prev->end */
3507 if (prev && comp->llc_extent.e_start != 0 &&
3508 comp->llc_extent.e_start != prev->llc_extent.e_end) {
3509 args->lsa_rc = LSE_NOT_ADJACENT_PREV;
3513 /* Components must have start <= end */
3514 if (comp->llc_extent.e_start > comp->llc_extent.e_end) {
3515 args->lsa_rc = LSE_START_GT_END;
3519 return LLAPI_LAYOUT_ITER_CONT;
3522 errno = errno ? errno : EINVAL;
3523 return LLAPI_LAYOUT_ITER_STOP;
3526 /* Print explanation of layout error */
3527 void llapi_layout_sanity_perror(int error)
3529 if (error >= LSE_LAST || error < 0) {
3530 fprintf(stdout, "Invalid layout, unrecognized error: %d\n",
3533 fprintf(stdout, "Invalid layout: %s\n",
3534 llapi_layout_strerror[error]);
3538 /* Walk a layout and enforce sanity checks that apply to > 1 component
3540 * The core idea here is that of sanity checking individual tokens vs semantic
3542 * We cannot check everything at the individual component level ('token'),
3543 * instead we must check whether or not the full layout has a valid meaning.
3545 * An example of a component level check is "is stripe size valid?". That is
3546 * handled when setting stripe size.
3548 * An example of a layout level check is "are the extents of these components
3549 * valid when adjacent to one another", or "can we set these flags on adjacent
3552 * \param[in] layout component layout list.
3553 * \param[in] fname file the layout to be checked for
3554 * \param[in] incomplete if layout is complete or not - some checks can
3555 * only be done on complete layouts.
3556 * \param[in] flr set when this is called from FLR mirror create
3558 * \retval 0, success, positive: various errors, see
3559 * llapi_layout_sanity_perror, -1, failure
3561 int llapi_layout_sanity(struct llapi_layout *layout,
3565 struct llapi_layout_sanity_args args = { 0 };
3566 struct llapi_layout_comp *curr;
3572 curr = layout->llot_cur_comp;
3579 args.lsa_incomplete = incomplete;
3581 /* When we modify an existing layout, this tells us if it's FLR */
3582 if (mirror_id_of(curr->llc_id) > 0)
3583 args.lsa_flr = true;
3586 rc = llapi_layout_comp_iterate(layout,
3587 llapi_layout_sanity_cb,
3589 if (errno == ENOENT)
3592 if (rc != LLAPI_LAYOUT_ITER_CONT)
3595 layout->llot_cur_comp = curr;
3600 int llapi_layout_dom_size(struct llapi_layout *layout, uint64_t *size)
3602 uint64_t pattern, start;
3605 if (!layout || !llapi_layout_is_composite(layout)) {
3610 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
3614 rc = llapi_layout_pattern_get(layout, &pattern);
3618 if (pattern != LOV_PATTERN_MDT && pattern != LLAPI_LAYOUT_MDT) {
3623 rc = llapi_layout_comp_extent_get(layout, &start, size);
3631 int lov_comp_md_size(struct lov_comp_md_v1 *lcm)
3633 if (lcm->lcm_magic == LOV_MAGIC_V1 || lcm->lcm_magic == LOV_MAGIC_V3) {
3634 struct lov_user_md *lum = (void *)lcm;
3636 return lov_user_md_size(lum->lmm_stripe_count, lum->lmm_magic);
3639 if (lcm->lcm_magic == LOV_MAGIC_FOREIGN) {
3640 struct lov_foreign_md *lfm = (void *)lcm;
3642 return lfm->lfm_length;
3645 if (lcm->lcm_magic != LOV_MAGIC_COMP_V1)
3648 return lcm->lcm_size;
3651 int llapi_get_lum_file_fd(int dir_fd, const char *fname, __u64 *valid,
3652 lstatx_t *statx, struct lov_user_md *lum,
3655 struct lov_user_mds_data *lmd;
3656 char buf[65536 + offsetof(typeof(*lmd), lmd_lmm)];
3660 if (lum && lumsize < sizeof(*lum))
3663 /* If a file name is provided, it is relative to the parent directory */
3669 lmd = (struct lov_user_mds_data *)buf;
3670 rc = get_lmd_info_fd(fname, parent_fd, dir_fd, buf, sizeof(buf),
3676 *valid = lmd->lmd_flags;
3679 memcpy(statx, &lmd->lmd_stx, sizeof(*statx));
3682 if (lmd->lmd_lmmsize > lumsize)
3684 memcpy(lum, &lmd->lmd_lmm, lmd->lmd_lmmsize);
3690 int llapi_get_lum_dir_fd(int dir_fd, __u64 *valid, lstatx_t *statx,
3691 struct lov_user_md *lum, size_t lumsize)
3693 return llapi_get_lum_file_fd(dir_fd, NULL, valid, statx, lum, lumsize);
3696 int llapi_get_lum_file(const char *path, __u64 *valid, lstatx_t *statx,
3697 struct lov_user_md *lum, size_t lumsize)
3699 char parent[PATH_MAX];
3706 tmp = strrchr(path, '/');
3708 strncpy(parent, ".", sizeof(parent) - 1);
3711 strncpy(parent, path, tmp - path);
3712 offset = tmp - path - 1;
3713 parent[tmp - path] = 0;
3718 fname += offset + 2;
3720 dir_fd = open(parent, O_RDONLY);
3723 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
3727 rc = llapi_get_lum_file_fd(dir_fd, fname, valid, statx, lum, lumsize);
3732 int llapi_get_lum_dir(const char *path, __u64 *valid, lstatx_t *statx,
3733 struct lov_user_md *lum, size_t lumsize)
3738 dir_fd = open(path, O_RDONLY);
3741 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
3745 rc = llapi_get_lum_dir_fd(dir_fd, valid, statx, lum, lumsize);