4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the GNU Lesser General Public License
8 * (LGPL) version 2.1 or (at your discretion) any later version.
9 * (LGPL) version 2.1 accompanies this distribution, and is available at
10 * http://www.gnu.org/licenses/lgpl-2.1.html
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
20 * lustre/utils/liblustreapi_layout.c
22 * lustreapi library for layout calls for interacting with the layout of
23 * Lustre files while hiding details of the internal data structures
26 * Copyright (c) 2016, 2017, Intel Corporation.
28 * Author: Ned Bass <bass6@llnl.gov>
38 #include <sys/xattr.h>
39 #include <sys/param.h>
41 #include <libcfs/util/list.h>
42 #include <lustre/lustreapi.h>
43 #include "lustreapi_internal.h"
46 * Layout component, which contains all attributes of a plain
49 struct llapi_layout_comp {
51 uint64_t llc_stripe_size;
52 uint64_t llc_stripe_count;
53 uint64_t llc_stripe_offset;
54 /* Add 1 so user always gets back a null terminated string. */
55 char llc_pool_name[LOV_MAXPOOLNAME + 1];
56 /** Number of objects in llc_objects array if was initialized. */
57 uint32_t llc_objects_count;
58 struct lov_user_ost_data_v1 *llc_objects;
59 /* fields used only for composite layouts */
60 struct lu_extent llc_extent; /* [start, end) of component */
61 uint32_t llc_id; /* unique ID of component */
62 uint32_t llc_flags; /* LCME_FL_* flags */
63 uint64_t llc_timestamp; /* snapshot timestamp */
64 struct list_head llc_list; /* linked to the llapi_layout
69 * An Opaque data type abstracting the layout of a Lustre file.
72 uint32_t llot_magic; /* LLAPI_LAYOUT_MAGIC */
75 bool llot_is_composite;
76 uint16_t llot_mirror_count;
77 /* Cursor pointing to one of the components in llot_comp_list */
78 struct llapi_layout_comp *llot_cur_comp;
79 struct list_head llot_comp_list;
83 * Compute the number of elements in the lmm_objects array of \a lum
84 * with size \a lum_size.
86 * \param[in] lum the struct lov_user_md to check
87 * \param[in] lum_size the number of bytes in \a lum
89 * \retval number of elements in array lum->lmm_objects
91 static int llapi_layout_objects_in_lum(struct lov_user_md *lum, size_t lum_size)
96 if (lum_size < lov_user_md_size(0, LOV_MAGIC_V1))
99 if (lum->lmm_magic == __swab32(LOV_MAGIC_V1) ||
100 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
101 magic = __swab32(lum->lmm_magic);
103 magic = lum->lmm_magic;
105 base_size = lov_user_md_size(0, magic);
107 if (lum_size <= base_size)
110 return (lum_size - base_size) / sizeof(lum->lmm_objects[0]);
114 * Byte-swap the fields of struct lov_user_md.
116 * XXX Rather than duplicating swabbing code here, we should eventually
117 * refactor the needed functions in lustre/ptlrpc/pack_generic.c
118 * into a library that can be shared between kernel and user code.
121 llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
123 int i, j, ent_count, obj_count;
124 struct lov_comp_md_v1 *comp_v1 = NULL;
125 struct lov_comp_md_entry_v1 *ent;
126 struct lov_user_ost_data *lod;
128 if (lum->lmm_magic != __swab32(LOV_MAGIC_V1) &&
129 lum->lmm_magic != __swab32(LOV_MAGIC_V3) &&
130 lum->lmm_magic != __swab32(LOV_MAGIC_COMP_V1))
133 if (lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
134 comp_v1 = (struct lov_comp_md_v1 *)lum;
136 if (comp_v1 != NULL) {
137 __swab32s(&comp_v1->lcm_magic);
138 __swab32s(&comp_v1->lcm_size);
139 __swab32s(&comp_v1->lcm_layout_gen);
140 __swab16s(&comp_v1->lcm_flags);
141 __swab16s(&comp_v1->lcm_entry_count);
142 ent_count = comp_v1->lcm_entry_count;
147 for (i = 0; i < ent_count; i++) {
148 if (comp_v1 != NULL) {
149 ent = &comp_v1->lcm_entries[i];
150 __swab32s(&ent->lcme_id);
151 __swab32s(&ent->lcme_flags);
152 __swab64s(&ent->lcme_timestamp);
153 __swab64s(&ent->lcme_extent.e_start);
154 __swab64s(&ent->lcme_extent.e_end);
155 __swab32s(&ent->lcme_offset);
156 __swab32s(&ent->lcme_size);
158 lum = (struct lov_user_md *)((char *)comp_v1 +
160 lum_size = ent->lcme_size;
162 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
164 __swab32s(&lum->lmm_magic);
165 __swab32s(&lum->lmm_pattern);
166 __swab32s(&lum->lmm_stripe_size);
167 __swab16s(&lum->lmm_stripe_count);
168 __swab16s(&lum->lmm_stripe_offset);
170 if (lum->lmm_magic != LOV_MAGIC_V1) {
171 struct lov_user_md_v3 *v3;
172 v3 = (struct lov_user_md_v3 *)lum;
173 lod = v3->lmm_objects;
175 lod = lum->lmm_objects;
178 for (j = 0; j < obj_count; j++)
179 __swab32s(&lod[j].l_ost_idx);
184 * (Re-)allocate llc_objects[] to \a num_stripes stripes.
186 * Copy over existing llc_objects[], if any, to the new llc_objects[].
188 * \param[in] layout existing layout to be modified
189 * \param[in] num_stripes number of stripes in new layout
191 * \retval 0 if the objects are re-allocated successfully
192 * \retval -1 on error with errno set
194 static int __llapi_comp_objects_realloc(struct llapi_layout_comp *comp,
195 unsigned int new_stripes)
197 struct lov_user_ost_data_v1 *new_objects;
200 if (new_stripes > LOV_MAX_STRIPE_COUNT) {
205 if (new_stripes == comp->llc_objects_count)
208 if (new_stripes != 0 && new_stripes <= comp->llc_objects_count)
211 new_objects = realloc(comp->llc_objects,
212 sizeof(*new_objects) * new_stripes);
213 if (new_objects == NULL && new_stripes != 0) {
218 for (i = comp->llc_objects_count; i < new_stripes; i++)
219 new_objects[i].l_ost_idx = LLAPI_LAYOUT_IDX_MAX;
221 comp->llc_objects = new_objects;
222 comp->llc_objects_count = new_stripes;
228 * Allocate storage for a llapi_layout_comp with \a num_stripes stripes.
230 * \param[in] num_stripes number of stripes in new layout
232 * \retval valid pointer if allocation succeeds
233 * \retval NULL if allocation fails
235 static struct llapi_layout_comp *__llapi_comp_alloc(unsigned int num_stripes)
237 struct llapi_layout_comp *comp;
239 if (num_stripes > LOV_MAX_STRIPE_COUNT) {
244 comp = calloc(1, sizeof(*comp));
250 comp->llc_objects = NULL;
251 comp->llc_objects_count = 0;
253 if (__llapi_comp_objects_realloc(comp, num_stripes) < 0) {
259 comp->llc_pattern = LLAPI_LAYOUT_DEFAULT;
260 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
261 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
262 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
263 comp->llc_pool_name[0] = '\0';
264 comp->llc_extent.e_start = 0;
265 comp->llc_extent.e_end = LUSTRE_EOF;
268 INIT_LIST_HEAD(&comp->llc_list);
274 * Free memory allocated for \a comp
276 * \param[in] comp previously allocated by __llapi_comp_alloc()
278 static void __llapi_comp_free(struct llapi_layout_comp *comp)
280 if (comp->llc_objects != NULL)
281 free(comp->llc_objects);
286 * Free memory allocated for \a layout.
288 * \param[in] layout previously allocated by llapi_layout_alloc()
290 void llapi_layout_free(struct llapi_layout *layout)
292 struct llapi_layout_comp *comp, *n;
297 list_for_each_entry_safe(comp, n, &layout->llot_comp_list, llc_list) {
298 list_del_init(&comp->llc_list);
299 __llapi_comp_free(comp);
305 * Allocate and initialize a llapi_layout structure.
307 * \retval valid llapi_layout pointer on success
308 * \retval NULL if memory allocation fails
310 static struct llapi_layout *__llapi_layout_alloc(void)
312 struct llapi_layout *layout;
314 layout = calloc(1, sizeof(*layout));
315 if (layout == NULL) {
321 layout->llot_magic = LLAPI_LAYOUT_MAGIC;
322 layout->llot_gen = 0;
323 layout->llot_flags = 0;
324 layout->llot_is_composite = false;
325 layout->llot_mirror_count = 1;
326 layout->llot_cur_comp = NULL;
327 INIT_LIST_HEAD(&layout->llot_comp_list);
333 * Allocate and initialize a new plain layout.
335 * \retval valid llapi_layout pointer on success
336 * \retval NULL if memory allocation fails
338 struct llapi_layout *llapi_layout_alloc(void)
340 struct llapi_layout_comp *comp;
341 struct llapi_layout *layout;
343 layout = __llapi_layout_alloc();
347 comp = __llapi_comp_alloc(0);
353 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
354 layout->llot_cur_comp = comp;
360 * Check if the given \a lum_size is large enough to hold the required
363 * \param[in] lum the struct lov_user_md to check
364 * \param[in] lum_size the number of bytes in \a lum
366 * \retval true the \a lum_size is too small
367 * \retval false the \a lum_size is large enough
369 static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
373 if (lum_size < sizeof(lum->lmm_magic))
376 if (lum->lmm_magic == LOV_MAGIC_V1 ||
377 lum->lmm_magic == __swab32(LOV_MAGIC_V1))
378 magic = LOV_MAGIC_V1;
379 else if (lum->lmm_magic == LOV_MAGIC_V3 ||
380 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
381 magic = LOV_MAGIC_V3;
382 else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
383 lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
384 magic = LOV_MAGIC_COMP_V1;
388 if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
389 return lum_size < lov_user_md_size(0, magic);
391 return lum_size < sizeof(struct lov_comp_md_v1);
394 /* Verify if the objects count in lum is consistent with the
395 * stripe count in lum. It applies to regular file only. */
396 static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
398 struct lov_comp_md_v1 *comp_v1 = NULL;
399 int i, ent_count, obj_count;
401 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
402 comp_v1 = (struct lov_comp_md_v1 *)lum;
403 ent_count = comp_v1->lcm_entry_count;
404 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
405 lum->lmm_magic == LOV_MAGIC_V3) {
411 for (i = 0; i < ent_count; i++) {
413 lum = (struct lov_user_md *)((char *)comp_v1 +
414 comp_v1->lcm_entries[i].lcme_offset);
415 lum_size = comp_v1->lcm_entries[i].lcme_size;
417 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
420 if (!(comp_v1->lcm_entries[i].lcme_flags &
421 LCME_FL_INIT) && obj_count != 0)
423 } else if (obj_count != lum->lmm_stripe_count) {
431 * Convert the data from a lov_user_md to a newly allocated llapi_layout.
432 * The caller is responsible for freeing the returned pointer.
434 * \param[in] lov_xattr LOV user metadata xattr to copy data from
435 * \param[in] lov_xattr_size size the lov_xattr_size passed in
436 * \param[in] flags bitwise-or'd flags to control the behavior
438 * \retval valid llapi_layout pointer on success
439 * \retval NULL if memory allocation fails
441 struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
442 ssize_t lov_xattr_size,
445 struct lov_user_md *lum = lov_xattr;
446 struct lov_comp_md_v1 *comp_v1 = NULL;
447 struct lov_comp_md_entry_v1 *ent;
448 struct lov_user_md *v1;
449 struct llapi_layout *layout = NULL;
450 struct llapi_layout_comp *comp;
451 int i, ent_count = 0, obj_count;
453 if (lov_xattr == NULL || lov_xattr_size <= 0) {
458 /* Return an error if we got back a partial layout. */
459 if (llapi_layout_lum_truncated(lov_xattr, lov_xattr_size)) {
464 #if __BYTE_ORDER == __BIG_ENDIAN
465 if (flags & LLAPI_LXF_COPY) {
466 lum = malloc(lov_xattr_size);
471 memcpy(lum, lov_xattr, lov_xattr_size);
475 llapi_layout_swab_lov_user_md(lum, lov_xattr_size);
477 if ((flags & LLAPI_LXF_CHECK) &&
478 !llapi_layout_lum_valid(lum, lov_xattr_size)) {
483 layout = __llapi_layout_alloc();
484 if (layout == NULL) {
489 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
490 comp_v1 = (struct lov_comp_md_v1 *)lum;
491 ent_count = comp_v1->lcm_entry_count;
492 layout->llot_gen = comp_v1->lcm_layout_gen;
493 layout->llot_is_composite = true;
494 layout->llot_mirror_count = comp_v1->lcm_mirror_count + 1;
495 layout->llot_gen = comp_v1->lcm_layout_gen;
496 layout->llot_flags = comp_v1->lcm_flags;
497 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
498 lum->lmm_magic == LOV_MAGIC_V3) {
500 layout->llot_is_composite = false;
502 if (lov_xattr_size <= 0) {
511 if (ent_count == 0) {
516 v1 = (struct lov_user_md *)lum;
517 for (i = 0; i < ent_count; i++) {
518 if (comp_v1 != NULL) {
519 ent = &comp_v1->lcm_entries[i];
520 v1 = (struct lov_user_md *)((char *)comp_v1 +
522 lov_xattr_size = ent->lcme_size;
527 obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
528 comp = __llapi_comp_alloc(obj_count);
533 comp->llc_extent.e_start = ent->lcme_extent.e_start;
534 comp->llc_extent.e_end = ent->lcme_extent.e_end;
535 comp->llc_id = ent->lcme_id;
536 comp->llc_flags = ent->lcme_flags;
537 if (comp->llc_flags & LCME_FL_NOSYNC)
538 comp->llc_timestamp = ent->lcme_timestamp;
540 comp->llc_extent.e_start = 0;
541 comp->llc_extent.e_end = LUSTRE_EOF;
546 if (v1->lmm_pattern == LOV_PATTERN_RAID0)
547 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
548 else if (v1->lmm_pattern == LOV_PATTERN_MDT)
549 comp->llc_pattern = LLAPI_LAYOUT_MDT;
551 /* Lustre only supports RAID0 and DoM for now. */
552 comp->llc_pattern = v1->lmm_pattern;
554 if (v1->lmm_stripe_size == 0)
555 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
557 comp->llc_stripe_size = v1->lmm_stripe_size;
559 if (v1->lmm_stripe_count == (typeof(v1->lmm_stripe_count))-1)
560 comp->llc_stripe_count = LLAPI_LAYOUT_WIDE;
561 else if (v1->lmm_stripe_count == 0)
562 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
564 comp->llc_stripe_count = v1->lmm_stripe_count;
566 if (v1->lmm_stripe_offset ==
567 (typeof(v1->lmm_stripe_offset))-1)
568 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
570 comp->llc_stripe_offset = v1->lmm_stripe_offset;
572 if (v1->lmm_magic != LOV_USER_MAGIC_V1) {
573 const struct lov_user_md_v3 *lumv3;
574 lumv3 = (struct lov_user_md_v3 *)v1;
575 snprintf(comp->llc_pool_name,
576 sizeof(comp->llc_pool_name),
577 "%s", lumv3->lmm_pool_name);
578 memcpy(comp->llc_objects, lumv3->lmm_objects,
579 obj_count * sizeof(lumv3->lmm_objects[0]));
581 const struct lov_user_md_v1 *lumv1;
582 lumv1 = (struct lov_user_md_v1 *)v1;
583 memcpy(comp->llc_objects, lumv1->lmm_objects,
584 obj_count * sizeof(lumv1->lmm_objects[0]));
588 comp->llc_stripe_offset =
589 comp->llc_objects[0].l_ost_idx;
591 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
592 layout->llot_cur_comp = comp;
596 if (lum != lov_xattr)
600 llapi_layout_free(layout);
606 * Convert the data from a llapi_layout to a newly allocated lov_user_md.
607 * The caller is responsible for freeing the returned pointer.
609 * \param[in] layout the layout to copy from
611 * \retval valid lov_user_md pointer on success
612 * \retval NULL if memory allocation fails or the layout is invalid
614 static struct lov_user_md *
615 llapi_layout_to_lum(const struct llapi_layout *layout)
617 struct llapi_layout_comp *comp;
618 struct lov_comp_md_v1 *comp_v1 = NULL;
619 struct lov_comp_md_entry_v1 *ent;
620 struct lov_user_md *lum = NULL;
625 if (layout == NULL ||
626 list_empty((struct list_head *)&layout->llot_comp_list)) {
631 /* Allocate header of lov_comp_md_v1 if necessary */
632 if (layout->llot_is_composite) {
635 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
638 lum_size = sizeof(*comp_v1) + comp_cnt * sizeof(*ent);
639 lum = calloc(lum_size, 1);
644 comp_v1 = (struct lov_comp_md_v1 *)lum;
645 comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
646 comp_v1->lcm_size = lum_size;
647 comp_v1->lcm_layout_gen = 0;
648 comp_v1->lcm_flags = layout->llot_flags;
649 comp_v1->lcm_entry_count = comp_cnt;
650 comp_v1->lcm_mirror_count = layout->llot_mirror_count - 1;
654 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
655 struct lov_user_md *blob;
658 int i, obj_count = 0;
659 struct lov_user_ost_data *lmm_objects;
660 uint64_t pattern = comp->llc_pattern;
662 if ((pattern & LLAPI_LAYOUT_SPECIFIC) != 0) {
663 if (comp->llc_objects_count <
664 comp->llc_stripe_count) {
668 magic = LOV_USER_MAGIC_SPECIFIC;
669 obj_count = comp->llc_stripe_count;
670 pattern &= ~LLAPI_LAYOUT_SPECIFIC;
671 } else if (strlen(comp->llc_pool_name) != 0) {
672 magic = LOV_USER_MAGIC_V3;
674 magic = LOV_USER_MAGIC_V1;
676 /* All stripes must be specified when the pattern contains
677 * LLAPI_LAYOUT_SPECIFIC */
678 for (i = 0; i < obj_count; i++) {
679 if (comp->llc_objects[i].l_ost_idx ==
680 LLAPI_LAYOUT_IDX_MAX) {
686 blob_size = lov_user_md_size(obj_count, magic);
687 blob = realloc(lum, lum_size + blob_size);
693 comp_v1 = (struct lov_comp_md_v1 *)lum;
694 blob = (struct lov_user_md *)((char *)lum + lum_size);
695 lum_size += blob_size;
698 blob->lmm_magic = magic;
699 if (pattern == LLAPI_LAYOUT_DEFAULT)
700 blob->lmm_pattern = LOV_PATTERN_RAID0;
701 else if (pattern == LLAPI_LAYOUT_MDT)
702 blob->lmm_pattern = LOV_PATTERN_MDT;
704 blob->lmm_pattern = pattern;
706 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
707 blob->lmm_stripe_size = 0;
709 blob->lmm_stripe_size = comp->llc_stripe_size;
711 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
712 blob->lmm_stripe_count = 0;
713 else if (comp->llc_stripe_count == LLAPI_LAYOUT_WIDE)
714 blob->lmm_stripe_count = LOV_ALL_STRIPES;
716 blob->lmm_stripe_count = comp->llc_stripe_count;
718 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
719 blob->lmm_stripe_offset = -1;
721 blob->lmm_stripe_offset = comp->llc_stripe_offset;
723 if (magic == LOV_USER_MAGIC_V3 ||
724 magic == LOV_USER_MAGIC_SPECIFIC) {
725 struct lov_user_md_v3 *lumv3 =
726 (struct lov_user_md_v3 *)blob;
728 if (comp->llc_pool_name[0] != '\0') {
729 strncpy(lumv3->lmm_pool_name,
731 sizeof(lumv3->lmm_pool_name));
733 memset(lumv3->lmm_pool_name, 0,
734 sizeof(lumv3->lmm_pool_name));
736 lmm_objects = lumv3->lmm_objects;
738 lmm_objects = blob->lmm_objects;
741 for (i = 0; i < obj_count; i++)
742 lmm_objects[i].l_ost_idx =
743 comp->llc_objects[i].l_ost_idx;
745 if (layout->llot_is_composite) {
746 ent = &comp_v1->lcm_entries[ent_idx];
747 ent->lcme_id = comp->llc_id;
748 ent->lcme_flags = comp->llc_flags;
749 if (ent->lcme_flags & LCME_FL_NOSYNC)
750 ent->lcme_timestamp = comp->llc_timestamp;
751 ent->lcme_extent.e_start = comp->llc_extent.e_start;
752 ent->lcme_extent.e_end = comp->llc_extent.e_end;
753 ent->lcme_size = blob_size;
754 ent->lcme_offset = offset;
756 comp_v1->lcm_size += blob_size;
770 * Get the parent directory of a path.
772 * \param[in] path path to get parent of
773 * \param[out] buf buffer in which to store parent path
774 * \param[in] size size in bytes of buffer \a buf
776 static void get_parent_dir(const char *path, char *buf, size_t size)
780 strncpy(buf, path, size - 1);
781 p = strrchr(buf, '/');
785 } else if (size >= 2) {
786 strncpy(buf, ".", 2);
787 buf[size - 1] = '\0';
792 * Substitute unspecified attribute values in \a layout with values
793 * from fs global settings. (lov.stripesize, lov.stripecount,
796 * \param[in] layout layout to inherit values from
797 * \param[in] path file path of the filesystem
799 static void inherit_sys_attributes(struct llapi_layout *layout,
802 struct llapi_layout_comp *comp;
803 unsigned int ssize, scount, soffset;
806 rc = sattr_cache_get_defaults(NULL, path, &scount, &ssize, &soffset);
810 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
811 if (comp->llc_pattern == LLAPI_LAYOUT_DEFAULT)
812 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
813 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
814 comp->llc_stripe_size = ssize;
815 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
816 comp->llc_stripe_count = scount;
817 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
818 comp->llc_stripe_offset = soffset;
823 * Get the current component of \a layout.
825 * \param[in] layout layout to get current component
827 * \retval valid llapi_layout_comp pointer on success
828 * \retval NULL on error
830 static struct llapi_layout_comp *
831 __llapi_layout_cur_comp(const struct llapi_layout *layout)
833 struct llapi_layout_comp *comp;
835 if (layout == NULL || layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
839 if (layout->llot_cur_comp == NULL) {
843 /* Verify data consistency */
844 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
845 if (comp == layout->llot_cur_comp)
852 * Test if any attributes of \a layout are specified.
854 * \param[in] layout the layout to check
856 * \retval true any attributes are specified
857 * \retval false all attributes are unspecified
859 static bool is_any_specified(const struct llapi_layout *layout)
861 struct llapi_layout_comp *comp;
863 comp = __llapi_layout_cur_comp(layout);
867 if (layout->llot_is_composite || layout->llot_mirror_count != 1)
870 return comp->llc_pattern != LLAPI_LAYOUT_DEFAULT ||
871 comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT ||
872 comp->llc_stripe_count != LLAPI_LAYOUT_DEFAULT ||
873 comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT ||
874 strlen(comp->llc_pool_name);
878 * Get the striping layout for the file referenced by file descriptor \a fd.
880 * If the filesystem does not support the "lustre." xattr namespace, the
881 * file must be on a non-Lustre filesystem, so set errno to ENOTTY per
882 * convention. If the file has no "lustre.lov" data, the file will
883 * inherit default values, so return a default layout.
885 * If the kernel gives us back less than the expected amount of data,
886 * we fail with errno set to EINTR.
888 * \param[in] fd open file descriptor
889 * \param[in] flags open file descriptor
891 * \retval valid llapi_layout pointer on success
892 * \retval NULL if an error occurs
894 struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags)
897 struct lov_user_md *lum;
898 struct llapi_layout *layout = NULL;
902 lum_len = XATTR_SIZE_MAX;
903 lum = malloc(lum_len);
907 bytes_read = fgetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_len);
908 if (bytes_read < 0) {
909 if (errno == EOPNOTSUPP)
911 else if (errno == ENODATA)
912 layout = llapi_layout_alloc();
916 /* Directories may have a positive non-zero lum->lmm_stripe_count
917 * yet have an empty lum->lmm_objects array. For non-directories the
918 * amount of data returned from the kernel must be consistent
919 * with the stripe count. */
920 if (fstat(fd, &st) < 0)
923 layout = llapi_layout_get_by_xattr(lum, bytes_read,
924 S_ISDIR(st.st_mode) ? 0 : LLAPI_LXF_CHECK);
931 * Get the expected striping layout for a file at \a path.
933 * Substitute expected inherited attribute values for unspecified
934 * attributes. Unspecified attributes may belong to directories and
935 * never-written-to files, and indicate that default values will be
936 * assigned when files are created or first written to. A default value
937 * is inherited from the parent directory if the attribute is specified
938 * there, otherwise it is inherited from the filesystem root.
939 * Unspecified attributes normally have the value LLAPI_LAYOUT_DEFAULT.
941 * The complete \a path need not refer to an existing file or directory,
942 * but some leading portion of it must reside within a lustre filesystem.
943 * A use case for this interface would be to obtain the literal striping
944 * values that would be assigned to a new file in a given directory.
946 * \param[in] path path for which to get the expected layout
948 * \retval valid llapi_layout pointer on success
949 * \retval NULL if an error occurs
951 static struct llapi_layout *llapi_layout_expected(const char *path)
953 struct llapi_layout *path_layout = NULL;
954 char donor_path[PATH_MAX];
959 fd = open(path, O_RDONLY);
960 if (fd < 0 && errno != ENOENT)
966 path_layout = llapi_layout_get_by_fd(fd, 0);
972 if (path_layout == NULL) {
973 if (errno != ENODATA && errno != ENOENT)
976 path_layout = llapi_layout_alloc();
977 if (path_layout == NULL)
981 if (is_any_specified(path_layout)) {
982 inherit_sys_attributes(path_layout, path);
986 llapi_layout_free(path_layout);
988 rc = stat(path, &st);
989 if (rc < 0 && errno != ENOENT)
992 /* If path is a not a directory or doesn't exist, inherit layout
993 * from parent directory. */
994 if ((rc == 0 && !S_ISDIR(st.st_mode)) ||
995 (rc < 0 && errno == ENOENT)) {
996 get_parent_dir(path, donor_path, sizeof(donor_path));
997 path_layout = llapi_layout_get_by_path(donor_path, 0);
998 if (path_layout != NULL) {
999 if (is_any_specified(path_layout)) {
1000 inherit_sys_attributes(path_layout, donor_path);
1003 llapi_layout_free(path_layout);
1007 /* Inherit layout from the filesystem root. */
1008 rc = llapi_search_mounts(path, 0, donor_path, NULL);
1011 path_layout = llapi_layout_get_by_path(donor_path, 0);
1012 if (path_layout == NULL)
1015 inherit_sys_attributes(path_layout, donor_path);
1020 * Get the striping layout for the file at \a path.
1022 * If \a flags contains LAYOUT_GET_EXPECTED, substitute
1023 * expected inherited attribute values for unspecified attributes. See
1024 * llapi_layout_expected().
1026 * \param[in] path path for which to get the layout
1027 * \param[in] flags flags to control how layout is retrieved
1029 * \retval valid llapi_layout pointer on success
1030 * \retval NULL if an error occurs
1032 struct llapi_layout *llapi_layout_get_by_path(const char *path, uint32_t flags)
1034 struct llapi_layout *layout = NULL;
1038 if (flags & LAYOUT_GET_EXPECTED)
1039 return llapi_layout_expected(path);
1041 fd = open(path, O_RDONLY);
1045 layout = llapi_layout_get_by_fd(fd, flags);
1054 * Get the layout for the file with FID \a fidstr in filesystem \a lustre_dir.
1056 * \param[in] lustre_dir path within Lustre filesystem containing \a fid
1057 * \param[in] fid Lustre identifier of file to get layout for
1059 * \retval valid llapi_layout pointer on success
1060 * \retval NULL if an error occurs
1062 struct llapi_layout *llapi_layout_get_by_fid(const char *lustre_dir,
1063 const struct lu_fid *fid,
1068 int saved_msg_level = llapi_msg_get_level();
1069 struct llapi_layout *layout = NULL;
1071 /* Prevent llapi internal routines from writing to console
1072 * while executing this function, then restore previous message
1074 llapi_msg_set_level(LLAPI_MSG_OFF);
1075 fd = llapi_open_by_fid(lustre_dir, fid, O_RDONLY);
1076 llapi_msg_set_level(saved_msg_level);
1081 layout = llapi_layout_get_by_fd(fd, flags);
1090 * Get the stripe count of \a layout.
1092 * \param[in] layout layout to get stripe count from
1093 * \param[out] count integer to store stripe count in
1095 * \retval 0 on success
1096 * \retval -1 if arguments are invalid
1098 int llapi_layout_stripe_count_get(const struct llapi_layout *layout,
1101 struct llapi_layout_comp *comp;
1103 comp = __llapi_layout_cur_comp(layout);
1107 if (count == NULL) {
1112 *count = comp->llc_stripe_count;
1118 * The llapi_layout API functions have these extra validity checks since
1119 * they use intuitively named macros to denote special behavior, whereas
1120 * the old API uses 0 and -1.
1123 static bool llapi_layout_stripe_count_is_valid(int64_t stripe_count)
1125 return stripe_count == LLAPI_LAYOUT_DEFAULT ||
1126 stripe_count == LLAPI_LAYOUT_WIDE ||
1127 (stripe_count != 0 && stripe_count != -1 &&
1128 llapi_stripe_count_is_valid(stripe_count));
1131 static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size)
1133 return stripe_size == LLAPI_LAYOUT_DEFAULT ||
1134 (stripe_size != 0 &&
1135 llapi_stripe_size_is_aligned(stripe_size) &&
1136 !llapi_stripe_size_is_too_big(stripe_size));
1139 static bool llapi_layout_stripe_index_is_valid(int64_t stripe_index)
1141 return stripe_index == LLAPI_LAYOUT_DEFAULT ||
1142 (stripe_index >= 0 &&
1143 llapi_stripe_index_is_valid(stripe_index));
1147 * Set the stripe count of \a layout.
1149 * \param[in] layout layout to set stripe count in
1150 * \param[in] count value to be set
1152 * \retval 0 on success
1153 * \retval -1 if arguments are invalid
1155 int llapi_layout_stripe_count_set(struct llapi_layout *layout,
1158 struct llapi_layout_comp *comp;
1160 comp = __llapi_layout_cur_comp(layout);
1164 if (!llapi_layout_stripe_count_is_valid(count)) {
1169 comp->llc_stripe_count = count;
1175 * Get the stripe size of \a layout.
1177 * \param[in] layout layout to get stripe size from
1178 * \param[out] size integer to store stripe size in
1180 * \retval 0 on success
1181 * \retval -1 if arguments are invalid
1183 int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
1186 struct llapi_layout_comp *comp;
1188 comp = __llapi_layout_cur_comp(layout);
1197 *size = comp->llc_stripe_size;
1203 * Set the stripe size of \a layout.
1205 * \param[in] layout layout to set stripe size in
1206 * \param[in] size value to be set
1208 * \retval 0 on success
1209 * \retval -1 if arguments are invalid
1211 int llapi_layout_stripe_size_set(struct llapi_layout *layout,
1214 struct llapi_layout_comp *comp;
1216 comp = __llapi_layout_cur_comp(layout);
1220 if (!llapi_layout_stripe_size_is_valid(size)) {
1225 comp->llc_stripe_size = size;
1231 * Get the RAID pattern of \a layout.
1233 * \param[in] layout layout to get pattern from
1234 * \param[out] pattern integer to store pattern in
1236 * \retval 0 on success
1237 * \retval -1 if arguments are invalid
1239 int llapi_layout_pattern_get(const struct llapi_layout *layout,
1242 struct llapi_layout_comp *comp;
1244 comp = __llapi_layout_cur_comp(layout);
1248 if (pattern == NULL) {
1253 *pattern = comp->llc_pattern;
1259 * Set the pattern of \a layout.
1261 * \param[in] layout layout to set pattern in
1262 * \param[in] pattern value to be set
1264 * \retval 0 on success
1265 * \retval -1 if arguments are invalid or RAID pattern
1268 int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
1270 struct llapi_layout_comp *comp;
1272 comp = __llapi_layout_cur_comp(layout);
1276 if (pattern != LLAPI_LAYOUT_DEFAULT &&
1277 pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT) {
1282 comp->llc_pattern = pattern |
1283 (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC);
1288 static inline int stripe_number_roundup(int stripe_number)
1290 unsigned int round_up = (stripe_number + 8) & ~7;
1291 return round_up > LOV_MAX_STRIPE_COUNT ?
1292 LOV_MAX_STRIPE_COUNT : round_up;
1296 * Set the OST index of stripe number \a stripe_number to \a ost_index.
1298 * If only the starting stripe's OST index is specified, then this can use
1299 * the normal LOV_MAGIC_{V1,V3} layout type. If multiple OST indices are
1300 * given, then allocate an array to hold the list of indices and ensure that
1301 * the LOV_USER_MAGIC_SPECIFIC layout is used when creating the file.
1303 * \param[in] layout layout to set OST index in
1304 * \param[in] stripe_number stripe number to set index for
1305 * \param[in] ost_index the index to set
1307 * \retval 0 on success
1308 * \retval -1 if arguments are invalid or an unsupported stripe number
1309 * was specified, error returned in errno
1311 int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number,
1314 struct llapi_layout_comp *comp;
1316 comp = __llapi_layout_cur_comp(layout);
1320 if (!llapi_layout_stripe_index_is_valid(ost_index)) {
1325 if (stripe_number == 0 && ost_index == LLAPI_LAYOUT_DEFAULT) {
1326 comp->llc_stripe_offset = ost_index;
1327 comp->llc_pattern &= ~LLAPI_LAYOUT_SPECIFIC;
1328 __llapi_comp_objects_realloc(comp, 0);
1329 } else if (stripe_number >= 0 &&
1330 stripe_number < LOV_MAX_STRIPE_COUNT) {
1331 if (ost_index >= LLAPI_LAYOUT_IDX_MAX) {
1336 /* Preallocate a few more stripes to avoid realloc() overhead.*/
1337 if (__llapi_comp_objects_realloc(comp,
1338 stripe_number_roundup(stripe_number)) < 0)
1341 comp->llc_objects[stripe_number].l_ost_idx = ost_index;
1343 if (stripe_number == 0)
1344 comp->llc_stripe_offset = ost_index;
1346 comp->llc_pattern |= LLAPI_LAYOUT_SPECIFIC;
1348 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT ||
1349 comp->llc_stripe_count <= stripe_number)
1350 comp->llc_stripe_count = stripe_number + 1;
1360 * Get the OST index associated with stripe \a stripe_number.
1362 * Stripes are indexed starting from zero.
1364 * \param[in] layout layout to get index from
1365 * \param[in] stripe_number stripe number to get index for
1366 * \param[out] index integer to store index in
1368 * \retval 0 on success
1369 * \retval -1 if arguments are invalid
1371 int llapi_layout_ost_index_get(const struct llapi_layout *layout,
1372 uint64_t stripe_number, uint64_t *index)
1374 struct llapi_layout_comp *comp;
1376 comp = __llapi_layout_cur_comp(layout);
1380 if (index == NULL) {
1385 if (stripe_number >= comp->llc_stripe_count ||
1386 stripe_number >= comp->llc_objects_count) {
1391 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
1392 *index = LLAPI_LAYOUT_DEFAULT;
1394 *index = comp->llc_objects[stripe_number].l_ost_idx;
1401 * Get the pool name of layout \a layout.
1403 * \param[in] layout layout to get pool name from
1404 * \param[out] dest buffer to store pool name in
1405 * \param[in] n size in bytes of buffer \a dest
1407 * \retval 0 on success
1408 * \retval -1 if arguments are invalid
1410 int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest,
1413 struct llapi_layout_comp *comp;
1415 comp = __llapi_layout_cur_comp(layout);
1424 strncpy(dest, comp->llc_pool_name, n);
1430 * Set the name of the pool of layout \a layout.
1432 * \param[in] layout layout to set pool name in
1433 * \param[in] pool_name pool name to set
1435 * \retval 0 on success
1436 * \retval -1 if arguments are invalid or pool name is too long
1438 int llapi_layout_pool_name_set(struct llapi_layout *layout,
1439 const char *pool_name)
1441 struct llapi_layout_comp *comp;
1444 comp = __llapi_layout_cur_comp(layout);
1448 if (pool_name == NULL) {
1453 /* Strip off any 'fsname.' portion. */
1454 ptr = strchr(pool_name, '.');
1456 pool_name = ptr + 1;
1458 if (strlen(pool_name) > LOV_MAXPOOLNAME) {
1463 strncpy(comp->llc_pool_name, pool_name, sizeof(comp->llc_pool_name));
1469 * Open and possibly create a file with a given \a layout.
1471 * If \a layout is NULL this function acts as a simple wrapper for
1472 * open(). By convention, ENOTTY is returned in errno if \a path
1473 * refers to a non-Lustre file.
1475 * \param[in] path name of the file to open
1476 * \param[in] open_flags open() flags
1477 * \param[in] mode permissions to create file, filtered by umask
1478 * \param[in] layout layout to create new file with
1480 * \retval non-negative file descriptor on successful open
1481 * \retval -1 if an error occurred
1483 int llapi_layout_file_open(const char *path, int open_flags, mode_t mode,
1484 const struct llapi_layout *layout)
1489 struct lov_user_md *lum;
1493 (layout != NULL && layout->llot_magic != LLAPI_LAYOUT_MAGIC)) {
1498 /* Object creation must be postponed until after layout attributes
1499 * have been applied. */
1500 if (layout != NULL && (open_flags & O_CREAT))
1501 open_flags |= O_LOV_DELAY_CREATE;
1503 fd = open(path, open_flags, mode);
1505 if (layout == NULL || fd < 0)
1508 lum = llapi_layout_to_lum(layout);
1517 if (lum->lmm_magic == LOV_USER_MAGIC_COMP_V1)
1518 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
1519 else if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC)
1520 lum_size = lov_user_md_size(lum->lmm_stripe_count,
1523 lum_size = lov_user_md_size(0, lum->lmm_magic);
1525 rc = fsetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_size, 0);
1534 errno = errno == EOPNOTSUPP ? ENOTTY : errno;
1540 * Create a file with a given \a layout.
1542 * Force O_CREAT and O_EXCL flags on so caller is assured that file was
1543 * created with the given \a layout on successful function return.
1545 * \param[in] path name of the file to open
1546 * \param[in] open_flags open() flags
1547 * \param[in] mode permissions to create new file with
1548 * \param[in] layout layout to create new file with
1550 * \retval non-negative file descriptor on successful open
1551 * \retval -1 if an error occurred
1553 int llapi_layout_file_create(const char *path, int open_flags, int mode,
1554 const struct llapi_layout *layout)
1556 return llapi_layout_file_open(path, open_flags|O_CREAT|O_EXCL, mode,
1560 int llapi_layout_flags_get(struct llapi_layout *layout, uint32_t *flags)
1562 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1567 *flags = layout->llot_flags;
1572 * Set flags to the header of a component layout.
1574 int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
1576 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1581 layout->llot_flags = flags;
1585 const char *llapi_layout_flags_string(uint32_t flags)
1587 switch (flags & LCM_FL_FLR_MASK) {
1590 case LCM_FL_WRITE_PENDING:
1592 case LCM_FL_SYNC_PENDING:
1599 const __u16 llapi_layout_string_flags(char *string)
1601 if (strncmp(string, "ro", strlen(string)) == 0)
1602 return LCM_FL_RDONLY;
1603 if (strncmp(string, "wp", strlen(string)) == 0)
1604 return LCM_FL_WRITE_PENDING;
1605 if (strncmp(string, "sp", strlen(string)) == 0)
1606 return LCM_FL_SYNC_PENDING;
1612 * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
1613 * @count: Mirror count value to be checked.
1615 * This function checks the validity of mirror count.
1617 * Return: true on success or false on failure.
1619 static bool llapi_layout_mirror_count_is_valid(uint16_t count)
1621 return count >= 0 && count <= LUSTRE_MIRROR_COUNT_MAX;
1625 * llapi_layout_mirror_count_get() - Get mirror count from the header of
1627 * @layout: Layout to get mirror count from.
1628 * @count: Returned mirror count value.
1630 * This function gets mirror count from the header of a layout.
1632 * Return: 0 on success or -1 on failure.
1634 int llapi_layout_mirror_count_get(struct llapi_layout *layout,
1637 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1642 *count = layout->llot_mirror_count;
1647 * llapi_layout_mirror_count_set() - Set mirror count to the header of a layout.
1648 * @layout: Layout to set mirror count in.
1649 * @count: Mirror count value to be set.
1651 * This function sets mirror count to the header of a layout.
1653 * Return: 0 on success or -1 on failure.
1655 int llapi_layout_mirror_count_set(struct llapi_layout *layout,
1658 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1663 if (!llapi_layout_mirror_count_is_valid(count)) {
1668 layout->llot_mirror_count = count;
1673 * Fetch the start and end offset of the current layout component.
1675 * \param[in] layout the layout component
1676 * \param[out] start extent start, inclusive
1677 * \param[out] end extent end, exclusive
1679 * \retval 0 on success
1680 * \retval <0 if error occurs
1682 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
1683 uint64_t *start, uint64_t *end)
1685 struct llapi_layout_comp *comp;
1687 comp = __llapi_layout_cur_comp(layout);
1691 if (start == NULL || end == NULL) {
1696 *start = comp->llc_extent.e_start;
1697 *end = comp->llc_extent.e_end;
1703 * Set the layout extent of a layout.
1705 * \param[in] layout the layout to be set
1706 * \param[in] start extent start, inclusive
1707 * \param[in] end extent end, exclusive
1709 * \retval 0 on success
1710 * \retval <0 if error occurs
1712 int llapi_layout_comp_extent_set(struct llapi_layout *layout,
1713 uint64_t start, uint64_t end)
1715 struct llapi_layout_comp *prev, *next, *comp;
1717 comp = __llapi_layout_cur_comp(layout);
1727 * We need to make sure the extent to be set is valid: the new
1728 * extent must be adjacent with the prev & next component.
1730 if (comp->llc_list.prev != &layout->llot_comp_list) {
1731 prev = list_entry(comp->llc_list.prev, typeof(*prev),
1733 if (start != 0 && start != prev->llc_extent.e_end) {
1739 if (comp->llc_list.next != &layout->llot_comp_list) {
1740 next = list_entry(comp->llc_list.next, typeof(*next),
1742 if (next->llc_extent.e_start != 0 &&
1743 end != next->llc_extent.e_start) {
1749 comp->llc_extent.e_start = start;
1750 comp->llc_extent.e_end = end;
1751 layout->llot_is_composite = true;
1757 * Gets the attribute flags of the current component.
1759 * \param[in] layout the layout component
1760 * \param[out] flags stored the returned component flags
1762 * \retval 0 on success
1763 * \retval <0 if error occurs
1765 int llapi_layout_comp_flags_get(const struct llapi_layout *layout,
1768 struct llapi_layout_comp *comp;
1770 comp = __llapi_layout_cur_comp(layout);
1774 if (flags == NULL) {
1779 *flags = comp->llc_flags;
1785 * Sets the specified flags of the current component leaving other flags as-is.
1787 * \param[in] layout the layout component
1788 * \param[in] flags component flags to be set
1790 * \retval 0 on success
1791 * \retval <0 if error occurs
1793 int llapi_layout_comp_flags_set(struct llapi_layout *layout, uint32_t flags)
1795 struct llapi_layout_comp *comp;
1797 comp = __llapi_layout_cur_comp(layout);
1801 comp->llc_flags |= flags;
1807 * Clears the flags specified in the flags leaving other flags as-is.
1809 * \param[in] layout the layout component
1810 * \param[in] flags component flags to be cleared
1812 * \retval 0 on success
1813 * \retval <0 if error occurs
1815 int llapi_layout_comp_flags_clear(struct llapi_layout *layout,
1818 struct llapi_layout_comp *comp;
1820 comp = __llapi_layout_cur_comp(layout);
1824 comp->llc_flags &= ~flags;
1830 * Fetches the file-unique component ID of the current layout component.
1832 * \param[in] layout the layout component
1833 * \param[out] id stored the returned component ID
1835 * \retval 0 on success
1836 * \retval <0 if error occurs
1838 int llapi_layout_comp_id_get(const struct llapi_layout *layout, uint32_t *id)
1840 struct llapi_layout_comp *comp;
1842 comp = __llapi_layout_cur_comp(layout);
1856 * Return the mirror id of the current layout component.
1858 * \param[in] layout the layout component
1859 * \param[out] id stored the returned mirror ID
1861 * \retval 0 on success
1862 * \retval <0 if error occurs
1864 int llapi_layout_mirror_id_get(const struct llapi_layout *layout, uint32_t *id)
1866 struct llapi_layout_comp *comp;
1868 comp = __llapi_layout_cur_comp(layout);
1877 *id = mirror_id_of(comp->llc_id);
1883 * Adds a component to \a layout, the new component will be added to
1884 * the tail of components list and it'll inherit attributes of existing
1885 * ones. The \a layout will change it's current component pointer to
1886 * the newly added component, and it'll be turned into a composite
1887 * layout if it was not before the adding.
1889 * \param[in] layout existing composite or plain layout
1891 * \retval 0 on success
1892 * \retval <0 if error occurs
1894 int llapi_layout_comp_add(struct llapi_layout *layout)
1896 struct llapi_layout_comp *last, *comp, *new;
1898 comp = __llapi_layout_cur_comp(layout);
1902 new = __llapi_comp_alloc(0);
1906 last = list_entry(layout->llot_comp_list.prev, typeof(*last),
1909 if (new->llc_extent.e_end <= last->llc_extent.e_end) {
1910 __llapi_comp_free(new);
1914 new->llc_extent.e_start = last->llc_extent.e_end;
1916 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1917 layout->llot_cur_comp = new;
1918 layout->llot_is_composite = true;
1923 * Adds a first component of a mirror to \a layout.
1924 * The \a layout will change it's current component pointer to
1925 * the newly added component, and it'll be turned into a composite
1926 * layout if it was not before the adding.
1928 * \param[in] layout existing composite or plain layout
1930 * \retval 0 on success
1931 * \retval <0 if error occurs
1933 int llapi_layout_add_first_comp(struct llapi_layout *layout)
1935 struct llapi_layout_comp *comp, *new;
1937 comp = __llapi_layout_cur_comp(layout);
1941 new = __llapi_comp_alloc(0);
1945 new->llc_extent.e_start = 0;
1947 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1948 layout->llot_cur_comp = new;
1949 layout->llot_is_composite = true;
1955 * Deletes current component from the composite layout. The component
1956 * to be deleted must be the tail of components list, and it can't be
1957 * the only component in the layout.
1959 * \param[in] layout composite layout
1961 * \retval 0 on success
1962 * \retval <0 if error occurs
1964 int llapi_layout_comp_del(struct llapi_layout *layout)
1966 struct llapi_layout_comp *comp;
1968 comp = __llapi_layout_cur_comp(layout);
1972 if (!layout->llot_is_composite) {
1977 /* It must be the tail of the list (for PFL, can be relaxed
1978 * once we get mirrored components) */
1979 if (comp->llc_list.next != &layout->llot_comp_list) {
1983 /* It can't be the only one on the list */
1984 if (comp->llc_list.prev == &layout->llot_comp_list) {
1989 layout->llot_cur_comp =
1990 list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
1991 list_del_init(&comp->llc_list);
1992 __llapi_comp_free(comp);
1998 * Move the current component pointer to the component with
1999 * specified component ID.
2001 * \param[in] layout composite layout
2002 * \param[in] id component ID
2004 * \retval =0 : moved successfully
2005 * \retval <0 if error occurs
2007 int llapi_layout_comp_use_id(struct llapi_layout *layout, uint32_t comp_id)
2009 struct llapi_layout_comp *comp;
2011 comp = __llapi_layout_cur_comp(layout);
2013 return -1; /* use previously set errno */
2015 if (!layout->llot_is_composite) {
2020 if (comp_id == LCME_ID_INVAL) {
2025 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
2026 if (comp->llc_id == comp_id) {
2027 layout->llot_cur_comp = comp;
2036 * Move the current component pointer to a specified position.
2038 * \param[in] layout composite layout
2039 * \param[in] pos the position to be moved, it can be:
2040 * LLAPI_LAYOUT_COMP_USE_FIRST: use first component
2041 * LLAPI_LAYOUT_COMP_USE_LAST: use last component
2042 * LLAPI_LAYOUT_COMP_USE_NEXT: use component after current
2043 * LLAPI_LAYOUT_COMP_USE_PREV: use component before current
2045 * \retval =0 : moved successfully
2046 * \retval =1 : at last component with NEXT, at first component with PREV
2047 * \retval <0 if error occurs
2049 int llapi_layout_comp_use(struct llapi_layout *layout,
2050 enum llapi_layout_comp_use pos)
2052 struct llapi_layout_comp *comp, *head, *tail;
2054 comp = __llapi_layout_cur_comp(layout);
2058 if (!layout->llot_is_composite) {
2059 if (pos == LLAPI_LAYOUT_COMP_USE_FIRST ||
2060 pos == LLAPI_LAYOUT_COMP_USE_LAST)
2066 head = list_entry(layout->llot_comp_list.next, typeof(*head), llc_list);
2067 tail = list_entry(layout->llot_comp_list.prev, typeof(*tail), llc_list);
2069 case LLAPI_LAYOUT_COMP_USE_FIRST:
2070 layout->llot_cur_comp = head;
2072 case LLAPI_LAYOUT_COMP_USE_NEXT:
2077 layout->llot_cur_comp = list_entry(comp->llc_list.next,
2078 typeof(*comp), llc_list);
2080 case LLAPI_LAYOUT_COMP_USE_LAST:
2081 layout->llot_cur_comp = tail;
2083 case LLAPI_LAYOUT_COMP_USE_PREV:
2088 layout->llot_cur_comp = list_entry(comp->llc_list.prev,
2089 typeof(*comp), llc_list);
2100 * Add layout component(s) to an existing file.
2102 * \param[in] path The path name of the file
2103 * \param[in] layout The layout component(s) to be added
2105 int llapi_layout_file_comp_add(const char *path,
2106 const struct llapi_layout *layout)
2108 int rc, fd, lum_size, tmp_errno = 0;
2109 struct lov_user_md *lum;
2111 if (path == NULL || layout == NULL ||
2112 layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
2117 lum = llapi_layout_to_lum(layout);
2121 if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) {
2126 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2128 fd = open(path, O_RDWR);
2135 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".add", lum, lum_size, 0);
2150 * Delete component(s) by the specified component id or component flags
2151 * from an existing file.
2153 * \param[in] path path name of the file
2154 * \param[in] id unique component ID
2155 * \param[in] flags flags: LCME_FL_* or;
2156 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2158 int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags)
2160 int rc, fd, lum_size;
2161 struct llapi_layout *layout;
2162 struct llapi_layout_comp *comp;
2163 struct lov_user_md *lum;
2165 if (path == NULL || id > LCME_ID_MAX || (flags & ~LCME_KNOWN_FLAGS)) {
2170 /* Can only specify ID or flags, not both. */
2171 if (id != 0 && flags != 0) {
2176 layout = llapi_layout_alloc();
2180 llapi_layout_comp_extent_set(layout, 0, LUSTRE_EOF);
2181 comp = __llapi_layout_cur_comp(layout);
2183 llapi_layout_free(layout);
2188 comp->llc_flags = flags;
2190 lum = llapi_layout_to_lum(layout);
2192 llapi_layout_free(layout);
2195 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2197 fd = open(path, O_RDWR);
2203 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".del", lum, lum_size, 0);
2205 int tmp_errno = errno;
2214 llapi_layout_free(layout);
2219 * Change flags or other parameters of the component(s) by component ID of an
2220 * existing file. The component to be modified is specified by the
2221 * comp->lcme_id value, which must be an unique component ID. The new
2222 * attributes are passed in by @comp and @valid is used to specify which
2223 * attributes in the component are going to be changed.
2225 * \param[in] path path name of the file
2226 * \param[in] ids An array of component IDs
2227 * \param[in] flags flags: LCME_FL_* or;
2228 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2229 * \param[in] count Number of elements in ids and flags array
2231 int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags,
2234 int rc = -1, fd = -1, i;
2236 struct llapi_layout *layout;
2237 struct llapi_layout_comp *comp;
2238 struct lov_user_md *lum = NULL;
2248 for (i = 0; i < count; i++) {
2249 if (!ids[i] || !flags[i]) {
2254 if (ids[i] > LCME_ID_MAX || (flags[i] & ~LCME_KNOWN_FLAGS)) {
2259 /* do not allow to set or clear INIT flag */
2260 if (flags[i] & LCME_FL_INIT) {
2266 layout = __llapi_layout_alloc();
2270 layout->llot_is_composite = true;
2271 for (i = 0; i < count; i++) {
2272 comp = __llapi_comp_alloc(0);
2276 comp->llc_id = ids[i];
2277 comp->llc_flags = flags[i];
2279 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
2280 layout->llot_cur_comp = comp;
2283 lum = llapi_layout_to_lum(layout);
2287 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2289 fd = open(path, O_RDWR);
2293 /* flush cached pages from clients */
2294 rc = llapi_file_flush(fd);
2301 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0);
2309 int tmp_errno = errno;
2316 llapi_layout_free(layout);
2321 * Check if the file layout is composite.
2323 * \param[in] layout the file layout to check
2325 * \retval true composite
2326 * \retval false not composite
2328 bool llapi_layout_is_composite(struct llapi_layout *layout)
2330 return layout->llot_is_composite;
2334 * Iterate every components in the @layout and call callback function @cb.
2336 * \param[in] layout component layout list.
2337 * \param[in] cb callback for each component
2338 * \param[in] cbdata callback data
2340 * \retval < 0 error happens during the iteration
2341 * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error
2342 * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration
2344 int llapi_layout_comp_iterate(struct llapi_layout *layout,
2345 llapi_layout_iter_cb cb, void *cbdata)
2349 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2354 * make sure on success llapi_layout_comp_use() API returns 0 with
2360 rc = cb(layout, cbdata);
2361 if (rc != LLAPI_LAYOUT_ITER_CONT)
2364 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2367 else if (rc == 1) /* reached the last comp */
2368 return LLAPI_LAYOUT_ITER_CONT;
2375 * llapi_layout_merge() - Merge a composite layout into another one.
2376 * @dst_layout: Destination composite layout.
2377 * @src_layout: Source composite layout.
2379 * This function copies all of the components from @src_layout and
2380 * appends them to @dst_layout.
2382 * Return: 0 on success or -1 on failure.
2384 int llapi_layout_merge(struct llapi_layout **dst_layout,
2385 const struct llapi_layout *src_layout)
2387 struct llapi_layout *new_layout = *dst_layout;
2388 struct llapi_layout_comp *new = NULL;
2389 struct llapi_layout_comp *comp = NULL;
2392 if (src_layout == NULL ||
2393 list_empty((struct list_head *)&src_layout->llot_comp_list))
2396 if (new_layout == NULL) {
2397 new_layout = __llapi_layout_alloc();
2398 if (new_layout == NULL) {
2404 list_for_each_entry(comp, &src_layout->llot_comp_list, llc_list) {
2405 new = __llapi_comp_alloc(0);
2411 new->llc_pattern = comp->llc_pattern;
2412 new->llc_stripe_size = comp->llc_stripe_size;
2413 new->llc_stripe_count = comp->llc_stripe_count;
2414 new->llc_stripe_offset = comp->llc_stripe_offset;
2416 if (comp->llc_pool_name[0] != '\0')
2417 strncpy(new->llc_pool_name, comp->llc_pool_name,
2418 sizeof(new->llc_pool_name));
2420 for (i = 0; i < comp->llc_objects_count; i++) {
2421 if (__llapi_comp_objects_realloc(new,
2422 stripe_number_roundup(i)) < 0) {
2424 __llapi_comp_free(new);
2427 new->llc_objects[i].l_ost_idx = \
2428 comp->llc_objects[i].l_ost_idx;
2431 new->llc_objects_count = comp->llc_objects_count;
2432 new->llc_extent.e_start = comp->llc_extent.e_start;
2433 new->llc_extent.e_end = comp->llc_extent.e_end;
2434 new->llc_id = comp->llc_id;
2435 new->llc_flags = comp->llc_flags;
2437 list_add_tail(&new->llc_list, &new_layout->llot_comp_list);
2438 new_layout->llot_cur_comp = new;
2440 new_layout->llot_is_composite = true;
2442 *dst_layout = new_layout;
2445 llapi_layout_free(new_layout);
2450 * Find all stale components.
2452 * \param[in] layout component layout list.
2453 * \param[out] comp array of stale component info.
2454 * \param[in] comp_size array size of @comp.
2455 * \param[in] mirror_ids array of mirror id that only components
2456 * belonging to these mirror will be collected.
2457 * \param[in] ids_nr number of mirror ids array.
2459 * \retval number of component info collected on sucess or
2460 * an error code on failure.
2462 int llapi_mirror_find_stale(struct llapi_layout *layout,
2463 struct llapi_resync_comp *comp, size_t comp_size,
2464 __u16 *mirror_ids, int ids_nr)
2469 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2477 uint64_t start, end;
2479 rc = llapi_layout_comp_flags_get(layout, &flags);
2483 if (!(flags & LCME_FL_STALE))
2486 rc = llapi_layout_mirror_id_get(layout, &mirror_id);
2490 /* the caller only wants stale components from specific
2495 for (j = 0; j < ids_nr; j++) {
2496 if (mirror_ids[j] == mirror_id)
2500 /* not in the specified mirror */
2503 } else if (flags & LCME_FL_NOSYNC) {
2504 /* if not specified mirrors, do not resync "nosync"
2509 rc = llapi_layout_comp_id_get(layout, &id);
2513 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2517 /* pack this component into @comp array */
2518 comp[idx].lrc_id = id;
2519 comp[idx].lrc_mirror_id = mirror_id;
2520 comp[idx].lrc_start = start;
2521 comp[idx].lrc_end = end;
2524 if (idx >= comp_size) {
2530 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2537 return rc < 0 ? rc : idx;
2540 /* locate @layout to a valid component covering file [file_start, file_end) */
2541 uint32_t llapi_mirror_find(struct llapi_layout *layout,
2542 uint64_t file_start, uint64_t file_end,
2545 uint32_t mirror_id = 0;
2548 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2554 uint64_t start, end;
2555 uint32_t flags, id, rid;
2557 rc = llapi_layout_comp_flags_get(layout, &flags);
2561 if (flags & LCME_FL_STALE)
2564 rc = llapi_layout_mirror_id_get(layout, &rid);
2568 rc = llapi_layout_comp_id_get(layout, &id);
2572 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2576 if (file_start >= start && file_start < end) {
2579 else if (mirror_id != rid || *endp != start)
2582 file_start = *endp = end;
2583 if (end >= file_end)
2588 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2596 int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
2597 struct llapi_resync_comp *comp_array,
2598 int comp_size, uint64_t start, uint64_t end)
2601 size_t page_size = sysconf(_SC_PAGESIZE);
2602 const size_t buflen = 4 << 20; /* 4M */
2604 uint64_t pos = start;
2609 rc = posix_memalign(&buf, page_size, buflen);
2613 if (end == OBD_OBJECT_EOF)
2614 count = OBD_OBJECT_EOF;
2616 count = end - start;
2620 uint64_t mirror_end = 0;
2621 uint64_t bytes_left;
2626 src = llapi_mirror_find(layout, pos, end, &mirror_end);
2630 if (mirror_end == OBD_OBJECT_EOF) {
2633 bytes_left = MIN(count, mirror_end - pos);
2634 bytes_left = ((bytes_left - 1) | (page_size - 1)) + 1;
2636 to_read = MIN(buflen, bytes_left);
2638 bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
2639 if (bytes_read == 0) {
2643 if (bytes_read < 0) {
2648 /* round up to page align to make direct IO happy. */
2649 to_write = ((bytes_read - 1) | (page_size - 1)) + 1;
2651 for (i = 0; i < comp_size; i++) {
2654 size_t to_write2 = to_write;
2656 /* skip non-overlapped component */
2657 if (pos >= comp_array[i].lrc_end ||
2658 pos + to_write <= comp_array[i].lrc_start)
2661 if (pos < comp_array[i].lrc_start)
2662 pos2 = comp_array[i].lrc_start;
2664 to_write2 -= pos2 - pos;
2666 if ((pos + to_write) > comp_array[i].lrc_end)
2667 to_write2 -= pos + to_write -
2668 comp_array[i].lrc_end;
2670 written = llapi_mirror_write(fd,
2671 comp_array[i].lrc_mirror_id,
2676 * this component is not written successfully,
2677 * mark it using its lrc_synced, it is supposed
2678 * to be false before getting here.
2680 * And before this function returns, all
2681 * elements of comp_array will reverse their
2682 * lrc_synced flag to reflect their true
2685 comp_array[i].lrc_synced = true;
2686 llapi_error(LLAPI_MSG_ERROR, written,
2687 "component %u not synced\n",
2688 comp_array[i].lrc_id);
2693 assert(written == to_write2);
2697 count -= bytes_read;
2703 /* fatal error happens */
2704 for (i = 0; i < comp_size; i++)
2705 comp_array[i].lrc_synced = false;
2710 * no fatal error happens, each lrc_synced tells whether the component
2711 * has been resync successfully (note: we'd reverse the value to
2712 * reflect its true meaning.
2714 for (i = 0; i < comp_size; i++) {
2715 comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
2716 if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
2717 rc = llapi_mirror_truncate(fd,
2718 comp_array[i].lrc_mirror_id, pos);
2720 comp_array[i].lrc_synced = false;
2725 * returns the first error code for partially successful resync if
2731 int lov_comp_md_size(struct lov_comp_md_v1 *lcm)
2733 if (lcm->lcm_magic == LOV_MAGIC_V1 || lcm->lcm_magic == LOV_MAGIC_V3) {
2734 struct lov_user_md *lum = (void *)lcm;
2736 return lov_user_md_size(lum->lmm_stripe_count, lum->lmm_magic);
2739 if (lcm->lcm_magic != LOV_MAGIC_COMP_V1)
2742 return lcm->lcm_size;
2745 int llapi_get_lum_file_fd(int dir_fd, const char *fname, __u64 *valid,
2746 lstatx_t *statx, struct lov_user_md *lum,
2749 struct lov_user_mds_data *lmd;
2750 char buf[65536 + offsetof(typeof(*lmd), lmd_lmm)];
2754 if (lum && lumsize < sizeof(*lum))
2757 /* If a file name is provided, it is relative to the parent directory */
2763 lmd = (struct lov_user_mds_data *)buf;
2764 rc = get_lmd_info_fd(fname, parent_fd, dir_fd, buf, sizeof(buf),
2769 *valid = lmd->lmd_flags;
2771 memcpy(statx, &lmd->lmd_stx, sizeof(*statx));
2774 if (lmd->lmd_lmmsize > lumsize)
2776 memcpy(lum, &lmd->lmd_lmm, lmd->lmd_lmmsize);
2782 int llapi_get_lum_dir_fd(int dir_fd, __u64 *valid, lstatx_t *statx,
2783 struct lov_user_md *lum, size_t lumsize)
2785 return llapi_get_lum_file_fd(dir_fd, NULL, valid, statx, lum, lumsize);
2788 int llapi_get_lum_file(const char *path, __u64 *valid, lstatx_t *statx,
2789 struct lov_user_md *lum, size_t lumsize)
2791 char parent[PATH_MAX];
2798 tmp = strrchr(path, '/');
2800 strncpy(parent, ".", sizeof(parent) - 1);
2803 strncpy(parent, path, tmp - path);
2804 offset = tmp - path - 1;
2805 parent[tmp - path] = 0;
2810 fname += offset + 2;
2812 dir_fd = open(parent, O_RDONLY);
2815 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
2819 rc = llapi_get_lum_file_fd(dir_fd, fname, valid, statx, lum, lumsize);
2824 int llapi_get_lum_dir(const char *path, __u64 *valid, lstatx_t *statx,
2825 struct lov_user_md *lum, size_t lumsize)
2830 dir_fd = open(path, O_RDONLY);
2833 llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
2837 rc = llapi_get_lum_dir_fd(dir_fd, valid, statx, lum, lumsize);