4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the GNU Lesser General Public License
8 * (LGPL) version 2.1 or (at your discretion) any later version.
9 * (LGPL) version 2.1 accompanies this distribution, and is available at
10 * http://www.gnu.org/licenses/lgpl-2.1.html
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
20 * lustre/utils/liblustreapi_layout.c
22 * lustreapi library for layout calls for interacting with the layout of
23 * Lustre files while hiding details of the internal data structures
26 * Copyright (c) 2016, 2017, Intel Corporation.
28 * Author: Ned Bass <bass6@llnl.gov>
38 #include <sys/xattr.h>
39 #include <sys/param.h>
41 #include <libcfs/util/list.h>
42 #include <lustre/lustreapi.h>
43 #include "lustreapi_internal.h"
46 * Layout component, which contains all attributes of a plain
49 struct llapi_layout_comp {
51 uint64_t llc_stripe_size;
52 uint64_t llc_stripe_count;
53 uint64_t llc_stripe_offset;
54 /* Add 1 so user always gets back a null terminated string. */
55 char llc_pool_name[LOV_MAXPOOLNAME + 1];
56 /** Number of objects in llc_objects array if was initialized. */
57 uint32_t llc_objects_count;
58 struct lov_user_ost_data_v1 *llc_objects;
59 /* fields used only for composite layouts */
60 struct lu_extent llc_extent; /* [start, end) of component */
61 uint32_t llc_id; /* unique ID of component */
62 uint32_t llc_flags; /* LCME_FL_* flags */
63 uint64_t llc_timestamp; /* snapshot timestamp */
64 struct list_head llc_list; /* linked to the llapi_layout
69 * An Opaque data type abstracting the layout of a Lustre file.
72 uint32_t llot_magic; /* LLAPI_LAYOUT_MAGIC */
75 bool llot_is_composite;
76 uint16_t llot_mirror_count;
77 /* Cursor pointing to one of the components in llot_comp_list */
78 struct llapi_layout_comp *llot_cur_comp;
79 struct list_head llot_comp_list;
83 * Compute the number of elements in the lmm_objects array of \a lum
84 * with size \a lum_size.
86 * \param[in] lum the struct lov_user_md to check
87 * \param[in] lum_size the number of bytes in \a lum
89 * \retval number of elements in array lum->lmm_objects
91 static int llapi_layout_objects_in_lum(struct lov_user_md *lum, size_t lum_size)
96 if (lum_size < lov_user_md_size(0, LOV_MAGIC_V1))
99 if (lum->lmm_magic == __swab32(LOV_MAGIC_V1) ||
100 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
101 magic = __swab32(lum->lmm_magic);
103 magic = lum->lmm_magic;
105 base_size = lov_user_md_size(0, magic);
107 if (lum_size <= base_size)
110 return (lum_size - base_size) / sizeof(lum->lmm_objects[0]);
114 * Byte-swap the fields of struct lov_user_md.
116 * XXX Rather than duplicating swabbing code here, we should eventually
117 * refactor the needed functions in lustre/ptlrpc/pack_generic.c
118 * into a library that can be shared between kernel and user code.
121 llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
123 int i, j, ent_count, obj_count;
124 struct lov_comp_md_v1 *comp_v1 = NULL;
125 struct lov_comp_md_entry_v1 *ent;
126 struct lov_user_ost_data *lod;
128 if (lum->lmm_magic != __swab32(LOV_MAGIC_V1) &&
129 lum->lmm_magic != __swab32(LOV_MAGIC_V3) &&
130 lum->lmm_magic != __swab32(LOV_MAGIC_COMP_V1))
133 if (lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
134 comp_v1 = (struct lov_comp_md_v1 *)lum;
136 if (comp_v1 != NULL) {
137 __swab32s(&comp_v1->lcm_magic);
138 __swab32s(&comp_v1->lcm_size);
139 __swab32s(&comp_v1->lcm_layout_gen);
140 __swab16s(&comp_v1->lcm_flags);
141 __swab16s(&comp_v1->lcm_entry_count);
142 ent_count = comp_v1->lcm_entry_count;
147 for (i = 0; i < ent_count; i++) {
148 if (comp_v1 != NULL) {
149 ent = &comp_v1->lcm_entries[i];
150 __swab32s(&ent->lcme_id);
151 __swab32s(&ent->lcme_flags);
152 __swab64s(&ent->lcme_timestamp);
153 __swab64s(&ent->lcme_extent.e_start);
154 __swab64s(&ent->lcme_extent.e_end);
155 __swab32s(&ent->lcme_offset);
156 __swab32s(&ent->lcme_size);
158 lum = (struct lov_user_md *)((char *)comp_v1 +
160 lum_size = ent->lcme_size;
162 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
164 __swab32s(&lum->lmm_magic);
165 __swab32s(&lum->lmm_pattern);
166 __swab32s(&lum->lmm_stripe_size);
167 __swab16s(&lum->lmm_stripe_count);
168 __swab16s(&lum->lmm_stripe_offset);
170 if (lum->lmm_magic != LOV_MAGIC_V1) {
171 struct lov_user_md_v3 *v3;
172 v3 = (struct lov_user_md_v3 *)lum;
173 lod = v3->lmm_objects;
175 lod = lum->lmm_objects;
178 for (j = 0; j < obj_count; j++)
179 __swab32s(&lod[j].l_ost_idx);
184 * (Re-)allocate llc_objects[] to \a num_stripes stripes.
186 * Copy over existing llc_objects[], if any, to the new llc_objects[].
188 * \param[in] layout existing layout to be modified
189 * \param[in] num_stripes number of stripes in new layout
191 * \retval 0 if the objects are re-allocated successfully
192 * \retval -1 on error with errno set
194 static int __llapi_comp_objects_realloc(struct llapi_layout_comp *comp,
195 unsigned int new_stripes)
197 struct lov_user_ost_data_v1 *new_objects;
200 if (new_stripes > LOV_MAX_STRIPE_COUNT) {
205 if (new_stripes == comp->llc_objects_count)
208 if (new_stripes != 0 && new_stripes <= comp->llc_objects_count)
211 new_objects = realloc(comp->llc_objects,
212 sizeof(*new_objects) * new_stripes);
213 if (new_objects == NULL && new_stripes != 0) {
218 for (i = comp->llc_objects_count; i < new_stripes; i++)
219 new_objects[i].l_ost_idx = LLAPI_LAYOUT_IDX_MAX;
221 comp->llc_objects = new_objects;
222 comp->llc_objects_count = new_stripes;
228 * Allocate storage for a llapi_layout_comp with \a num_stripes stripes.
230 * \param[in] num_stripes number of stripes in new layout
232 * \retval valid pointer if allocation succeeds
233 * \retval NULL if allocation fails
235 static struct llapi_layout_comp *__llapi_comp_alloc(unsigned int num_stripes)
237 struct llapi_layout_comp *comp;
239 if (num_stripes > LOV_MAX_STRIPE_COUNT) {
244 comp = calloc(1, sizeof(*comp));
250 comp->llc_objects = NULL;
251 comp->llc_objects_count = 0;
253 if (__llapi_comp_objects_realloc(comp, num_stripes) < 0) {
259 comp->llc_pattern = LLAPI_LAYOUT_DEFAULT;
260 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
261 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
262 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
263 comp->llc_pool_name[0] = '\0';
264 comp->llc_extent.e_start = 0;
265 comp->llc_extent.e_end = LUSTRE_EOF;
268 INIT_LIST_HEAD(&comp->llc_list);
274 * Free memory allocated for \a comp
276 * \param[in] comp previously allocated by __llapi_comp_alloc()
278 static void __llapi_comp_free(struct llapi_layout_comp *comp)
280 if (comp->llc_objects != NULL)
281 free(comp->llc_objects);
286 * Free memory allocated for \a layout.
288 * \param[in] layout previously allocated by llapi_layout_alloc()
290 void llapi_layout_free(struct llapi_layout *layout)
292 struct llapi_layout_comp *comp, *n;
297 list_for_each_entry_safe(comp, n, &layout->llot_comp_list, llc_list) {
298 list_del_init(&comp->llc_list);
299 __llapi_comp_free(comp);
305 * Allocate and initialize a llapi_layout structure.
307 * \retval valid llapi_layout pointer on success
308 * \retval NULL if memory allocation fails
310 static struct llapi_layout *__llapi_layout_alloc(void)
312 struct llapi_layout *layout;
314 layout = calloc(1, sizeof(*layout));
315 if (layout == NULL) {
321 layout->llot_magic = LLAPI_LAYOUT_MAGIC;
322 layout->llot_gen = 0;
323 layout->llot_flags = 0;
324 layout->llot_is_composite = false;
325 layout->llot_mirror_count = 1;
326 layout->llot_cur_comp = NULL;
327 INIT_LIST_HEAD(&layout->llot_comp_list);
333 * Allocate and initialize a new plain layout.
335 * \retval valid llapi_layout pointer on success
336 * \retval NULL if memory allocation fails
338 struct llapi_layout *llapi_layout_alloc(void)
340 struct llapi_layout_comp *comp;
341 struct llapi_layout *layout;
343 layout = __llapi_layout_alloc();
347 comp = __llapi_comp_alloc(0);
353 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
354 layout->llot_cur_comp = comp;
360 * Convert the data from a lov_user_md to a newly allocated llapi_layout.
361 * The caller is responsible for freeing the returned pointer.
363 * \param[in] lov_xattr LOV user metadata xattr to copy data from
364 * \param[in] lov_xattr_size size the lov_xattr_size passed in
366 * \retval valid llapi_layout pointer on success
367 * \retval NULL if memory allocation fails
369 struct llapi_layout *llapi_layout_get_by_xattr(const void *lov_xattr,
370 ssize_t lov_xattr_size)
372 const struct lov_user_md *lum = lov_xattr;
373 struct lov_comp_md_v1 *comp_v1 = NULL;
374 struct lov_comp_md_entry_v1 *ent;
375 struct lov_user_md *v1;
376 struct llapi_layout *layout;
377 struct llapi_layout_comp *comp;
378 int i, ent_count = 0, obj_count;
380 layout = __llapi_layout_alloc();
384 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
385 comp_v1 = (struct lov_comp_md_v1 *)lum;
386 ent_count = comp_v1->lcm_entry_count;
387 layout->llot_gen = comp_v1->lcm_layout_gen;
388 layout->llot_is_composite = true;
389 layout->llot_mirror_count = comp_v1->lcm_mirror_count + 1;
390 layout->llot_gen = comp_v1->lcm_layout_gen;
391 layout->llot_flags = comp_v1->lcm_flags;
392 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
393 lum->lmm_magic == LOV_MAGIC_V3) {
395 layout->llot_is_composite = false;
397 if (lov_xattr_size <= 0) {
406 if (ent_count == 0) {
411 v1 = (struct lov_user_md *)lum;
412 for (i = 0; i < ent_count; i++) {
413 if (comp_v1 != NULL) {
414 ent = &comp_v1->lcm_entries[i];
415 v1 = (struct lov_user_md *)((char *)comp_v1 +
417 lov_xattr_size = ent->lcme_size;
422 obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
423 comp = __llapi_comp_alloc(obj_count);
428 comp->llc_extent.e_start = ent->lcme_extent.e_start;
429 comp->llc_extent.e_end = ent->lcme_extent.e_end;
430 comp->llc_id = ent->lcme_id;
431 comp->llc_flags = ent->lcme_flags;
432 if (comp->llc_flags & LCME_FL_NOSYNC)
433 comp->llc_timestamp = ent->lcme_timestamp;
435 comp->llc_extent.e_start = 0;
436 comp->llc_extent.e_end = LUSTRE_EOF;
441 if (v1->lmm_pattern == LOV_PATTERN_RAID0)
442 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
444 /* Lustre only supports RAID0 for now. */
445 comp->llc_pattern = v1->lmm_pattern;
447 if (v1->lmm_stripe_size == 0)
448 comp->llc_stripe_size = LLAPI_LAYOUT_DEFAULT;
450 comp->llc_stripe_size = v1->lmm_stripe_size;
452 if (v1->lmm_stripe_count == (typeof(v1->lmm_stripe_count))-1)
453 comp->llc_stripe_count = LLAPI_LAYOUT_WIDE;
454 else if (v1->lmm_stripe_count == 0)
455 comp->llc_stripe_count = LLAPI_LAYOUT_DEFAULT;
457 comp->llc_stripe_count = v1->lmm_stripe_count;
459 if (v1->lmm_stripe_offset ==
460 (typeof(v1->lmm_stripe_offset))-1)
461 comp->llc_stripe_offset = LLAPI_LAYOUT_DEFAULT;
463 comp->llc_stripe_offset = v1->lmm_stripe_offset;
465 if (v1->lmm_magic != LOV_USER_MAGIC_V1) {
466 const struct lov_user_md_v3 *lumv3;
467 lumv3 = (struct lov_user_md_v3 *)v1;
468 snprintf(comp->llc_pool_name,
469 sizeof(comp->llc_pool_name),
470 "%s", lumv3->lmm_pool_name);
471 memcpy(comp->llc_objects, lumv3->lmm_objects,
472 obj_count * sizeof(lumv3->lmm_objects[0]));
474 const struct lov_user_md_v1 *lumv1;
475 lumv1 = (struct lov_user_md_v1 *)v1;
476 memcpy(comp->llc_objects, lumv1->lmm_objects,
477 obj_count * sizeof(lumv1->lmm_objects[0]));
481 comp->llc_stripe_offset =
482 comp->llc_objects[0].l_ost_idx;
484 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
485 layout->llot_cur_comp = comp;
490 llapi_layout_free(layout);
495 * Convert the data from a llapi_layout to a newly allocated lov_user_md.
496 * The caller is responsible for freeing the returned pointer.
498 * \param[in] layout the layout to copy from
500 * \retval valid lov_user_md pointer on success
501 * \retval NULL if memory allocation fails or the layout is invalid
503 static struct lov_user_md *
504 llapi_layout_to_lum(const struct llapi_layout *layout)
506 struct llapi_layout_comp *comp;
507 struct lov_comp_md_v1 *comp_v1 = NULL;
508 struct lov_comp_md_entry_v1 *ent;
509 struct lov_user_md *lum = NULL;
514 if (layout == NULL ||
515 list_empty((struct list_head *)&layout->llot_comp_list)) {
520 /* Allocate header of lov_comp_md_v1 if necessary */
521 if (layout->llot_is_composite) {
524 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
527 lum_size = sizeof(*comp_v1) + comp_cnt * sizeof(*ent);
528 lum = calloc(lum_size, 1);
533 comp_v1 = (struct lov_comp_md_v1 *)lum;
534 comp_v1->lcm_magic = LOV_USER_MAGIC_COMP_V1;
535 comp_v1->lcm_size = lum_size;
536 comp_v1->lcm_layout_gen = 0;
537 comp_v1->lcm_flags = layout->llot_flags;
538 comp_v1->lcm_entry_count = comp_cnt;
539 comp_v1->lcm_mirror_count = layout->llot_mirror_count - 1;
543 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
544 struct lov_user_md *blob;
547 int i, obj_count = 0;
548 struct lov_user_ost_data *lmm_objects;
549 uint64_t pattern = comp->llc_pattern;
551 if ((pattern & LLAPI_LAYOUT_SPECIFIC) != 0) {
552 if (comp->llc_objects_count <
553 comp->llc_stripe_count) {
557 magic = LOV_USER_MAGIC_SPECIFIC;
558 obj_count = comp->llc_stripe_count;
559 pattern &= ~LLAPI_LAYOUT_SPECIFIC;
560 } else if (strlen(comp->llc_pool_name) != 0) {
561 magic = LOV_USER_MAGIC_V3;
563 magic = LOV_USER_MAGIC_V1;
565 /* All stripes must be specified when the pattern contains
566 * LLAPI_LAYOUT_SPECIFIC */
567 for (i = 0; i < obj_count; i++) {
568 if (comp->llc_objects[i].l_ost_idx ==
569 LLAPI_LAYOUT_IDX_MAX) {
575 blob_size = lov_user_md_size(obj_count, magic);
576 blob = realloc(lum, lum_size + blob_size);
582 comp_v1 = (struct lov_comp_md_v1 *)lum;
583 blob = (struct lov_user_md *)((char *)lum + lum_size);
584 lum_size += blob_size;
587 blob->lmm_magic = magic;
588 if (pattern == LLAPI_LAYOUT_DEFAULT)
589 blob->lmm_pattern = LOV_PATTERN_RAID0;
590 else if (pattern == LLAPI_LAYOUT_MDT)
591 blob->lmm_pattern = LOV_PATTERN_MDT;
593 blob->lmm_pattern = pattern;
595 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
596 blob->lmm_stripe_size = 0;
598 blob->lmm_stripe_size = comp->llc_stripe_size;
600 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
601 blob->lmm_stripe_count = 0;
602 else if (comp->llc_stripe_count == LLAPI_LAYOUT_WIDE)
603 blob->lmm_stripe_count = LOV_ALL_STRIPES;
605 blob->lmm_stripe_count = comp->llc_stripe_count;
607 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
608 blob->lmm_stripe_offset = -1;
610 blob->lmm_stripe_offset = comp->llc_stripe_offset;
612 if (magic == LOV_USER_MAGIC_V3 ||
613 magic == LOV_USER_MAGIC_SPECIFIC) {
614 struct lov_user_md_v3 *lumv3 =
615 (struct lov_user_md_v3 *)blob;
617 if (comp->llc_pool_name[0] != '\0') {
618 strncpy(lumv3->lmm_pool_name,
620 sizeof(lumv3->lmm_pool_name));
622 memset(lumv3->lmm_pool_name, 0,
623 sizeof(lumv3->lmm_pool_name));
625 lmm_objects = lumv3->lmm_objects;
627 lmm_objects = blob->lmm_objects;
630 for (i = 0; i < obj_count; i++)
631 lmm_objects[i].l_ost_idx =
632 comp->llc_objects[i].l_ost_idx;
634 if (layout->llot_is_composite) {
635 ent = &comp_v1->lcm_entries[ent_idx];
636 ent->lcme_id = comp->llc_id;
637 ent->lcme_flags = comp->llc_flags;
638 if (ent->lcme_flags & LCME_FL_NOSYNC)
639 ent->lcme_timestamp = comp->llc_timestamp;
640 ent->lcme_extent.e_start = comp->llc_extent.e_start;
641 ent->lcme_extent.e_end = comp->llc_extent.e_end;
642 ent->lcme_size = blob_size;
643 ent->lcme_offset = offset;
645 comp_v1->lcm_size += blob_size;
659 * Get the parent directory of a path.
661 * \param[in] path path to get parent of
662 * \param[out] buf buffer in which to store parent path
663 * \param[in] size size in bytes of buffer \a buf
665 static void get_parent_dir(const char *path, char *buf, size_t size)
669 strncpy(buf, path, size);
670 p = strrchr(buf, '/');
674 } else if (size >= 2) {
675 strncpy(buf, ".", 2);
676 buf[size - 1] = '\0';
681 * Substitute unspecified attribute values in \a layout with values
682 * from fs global settings. (lov.stripesize, lov.stripecount,
685 * \param[in] layout layout to inherit values from
686 * \param[in] path file path of the filesystem
688 static void inherit_sys_attributes(struct llapi_layout *layout,
691 struct llapi_layout_comp *comp;
692 unsigned int ssize, scount, soffset;
695 rc = sattr_cache_get_defaults(NULL, path, &scount, &ssize, &soffset);
699 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
700 if (comp->llc_pattern == LLAPI_LAYOUT_DEFAULT)
701 comp->llc_pattern = LLAPI_LAYOUT_RAID0;
702 if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
703 comp->llc_stripe_size = ssize;
704 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT)
705 comp->llc_stripe_count = scount;
706 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
707 comp->llc_stripe_offset = soffset;
712 * Get the current component of \a layout.
714 * \param[in] layout layout to get current component
716 * \retval valid llapi_layout_comp pointer on success
717 * \retval NULL on error
719 static struct llapi_layout_comp *
720 __llapi_layout_cur_comp(const struct llapi_layout *layout)
722 struct llapi_layout_comp *comp;
724 if (layout == NULL || layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
728 if (layout->llot_cur_comp == NULL) {
732 /* Verify data consistency */
733 list_for_each_entry(comp, &layout->llot_comp_list, llc_list)
734 if (comp == layout->llot_cur_comp)
741 * Test if any attributes of \a layout are specified.
743 * \param[in] layout the layout to check
745 * \retval true any attributes are specified
746 * \retval false all attributes are unspecified
748 static bool is_any_specified(const struct llapi_layout *layout)
750 struct llapi_layout_comp *comp;
752 comp = __llapi_layout_cur_comp(layout);
756 if (layout->llot_is_composite || layout->llot_mirror_count != 1)
759 return comp->llc_pattern != LLAPI_LAYOUT_DEFAULT ||
760 comp->llc_stripe_size != LLAPI_LAYOUT_DEFAULT ||
761 comp->llc_stripe_count != LLAPI_LAYOUT_DEFAULT ||
762 comp->llc_stripe_offset != LLAPI_LAYOUT_DEFAULT ||
763 strlen(comp->llc_pool_name);
767 * Check if the given \a lum_size is large enough to hold the required
770 * \param[in] lum the struct lov_user_md to check
771 * \param[in] lum_size the number of bytes in \a lum
773 * \retval true the \a lum_size is too small
774 * \retval false the \a lum_size is large enough
776 static bool llapi_layout_lum_truncated(struct lov_user_md *lum, size_t lum_size)
780 if (lum_size < sizeof(lum->lmm_magic))
783 if (lum->lmm_magic == LOV_MAGIC_V1 ||
784 lum->lmm_magic == __swab32(LOV_MAGIC_V1))
785 magic = LOV_MAGIC_V1;
786 else if (lum->lmm_magic == LOV_MAGIC_V3 ||
787 lum->lmm_magic == __swab32(LOV_MAGIC_V3))
788 magic = LOV_MAGIC_V3;
789 else if (lum->lmm_magic == LOV_MAGIC_COMP_V1 ||
790 lum->lmm_magic == __swab32(LOV_MAGIC_COMP_V1))
791 magic = LOV_MAGIC_COMP_V1;
795 if (magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3)
796 return lum_size < lov_user_md_size(0, magic);
798 return lum_size < sizeof(struct lov_comp_md_v1);
801 /* Verify if the objects count in lum is consistent with the
802 * stripe count in lum. It applies to regular file only. */
803 static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
805 struct lov_comp_md_v1 *comp_v1 = NULL;
806 int i, ent_count, obj_count;
808 if (lum->lmm_magic == LOV_MAGIC_COMP_V1) {
809 comp_v1 = (struct lov_comp_md_v1 *)lum;
810 ent_count = comp_v1->lcm_entry_count;
811 } else if (lum->lmm_magic == LOV_MAGIC_V1 ||
812 lum->lmm_magic == LOV_MAGIC_V3) {
818 for (i = 0; i < ent_count; i++) {
820 lum = (struct lov_user_md *)((char *)comp_v1 +
821 comp_v1->lcm_entries[i].lcme_offset);
822 lum_size = comp_v1->lcm_entries[i].lcme_size;
824 obj_count = llapi_layout_objects_in_lum(lum, lum_size);
827 if (!(comp_v1->lcm_entries[i].lcme_flags &
828 LCME_FL_INIT) && obj_count != 0)
830 } else if (obj_count != lum->lmm_stripe_count) {
838 * Get the striping layout for the file referenced by file descriptor \a fd.
840 * If the filesystem does not support the "lustre." xattr namespace, the
841 * file must be on a non-Lustre filesystem, so set errno to ENOTTY per
842 * convention. If the file has no "lustre.lov" data, the file will
843 * inherit default values, so return a default layout.
845 * If the kernel gives us back less than the expected amount of data,
846 * we fail with errno set to EINTR.
848 * \param[in] fd open file descriptor
849 * \param[in] flags open file descriptor
851 * \retval valid llapi_layout pointer on success
852 * \retval NULL if an error occurs
854 struct llapi_layout *llapi_layout_get_by_fd(int fd, uint32_t flags)
857 struct lov_user_md *lum;
858 struct llapi_layout *layout = NULL;
862 lum_len = XATTR_SIZE_MAX;
863 lum = malloc(lum_len);
867 bytes_read = fgetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_len);
868 if (bytes_read < 0) {
869 if (errno == EOPNOTSUPP)
871 else if (errno == ENODATA)
872 layout = llapi_layout_alloc();
876 /* Return an error if we got back a partial layout. */
877 if (llapi_layout_lum_truncated(lum, bytes_read)) {
882 llapi_layout_swab_lov_user_md(lum, bytes_read);
884 /* Directories may have a positive non-zero lum->lmm_stripe_count
885 * yet have an empty lum->lmm_objects array. For non-directories the
886 * amount of data returned from the kernel must be consistent
887 * with the stripe count. */
888 if (fstat(fd, &st) < 0)
891 if (!S_ISDIR(st.st_mode) && !llapi_layout_lum_valid(lum, bytes_read)) {
896 layout = llapi_layout_get_by_xattr(lum, bytes_read);
903 * Get the expected striping layout for a file at \a path.
905 * Substitute expected inherited attribute values for unspecified
906 * attributes. Unspecified attributes may belong to directories and
907 * never-written-to files, and indicate that default values will be
908 * assigned when files are created or first written to. A default value
909 * is inherited from the parent directory if the attribute is specified
910 * there, otherwise it is inherited from the filesystem root.
911 * Unspecified attributes normally have the value LLAPI_LAYOUT_DEFAULT.
913 * The complete \a path need not refer to an existing file or directory,
914 * but some leading portion of it must reside within a lustre filesystem.
915 * A use case for this interface would be to obtain the literal striping
916 * values that would be assigned to a new file in a given directory.
918 * \param[in] path path for which to get the expected layout
920 * \retval valid llapi_layout pointer on success
921 * \retval NULL if an error occurs
923 static struct llapi_layout *llapi_layout_expected(const char *path)
925 struct llapi_layout *path_layout = NULL;
926 char donor_path[PATH_MAX];
931 fd = open(path, O_RDONLY);
932 if (fd < 0 && errno != ENOENT)
938 path_layout = llapi_layout_get_by_fd(fd, 0);
944 if (path_layout == NULL) {
945 if (errno != ENODATA && errno != ENOENT)
948 path_layout = llapi_layout_alloc();
949 if (path_layout == NULL)
953 if (is_any_specified(path_layout)) {
954 inherit_sys_attributes(path_layout, path);
958 llapi_layout_free(path_layout);
960 rc = stat(path, &st);
961 if (rc < 0 && errno != ENOENT)
964 /* If path is a not a directory or doesn't exist, inherit layout
965 * from parent directory. */
966 if ((rc == 0 && !S_ISDIR(st.st_mode)) ||
967 (rc < 0 && errno == ENOENT)) {
968 get_parent_dir(path, donor_path, sizeof(donor_path));
969 path_layout = llapi_layout_get_by_path(donor_path, 0);
970 if (path_layout != NULL) {
971 if (is_any_specified(path_layout)) {
972 inherit_sys_attributes(path_layout, donor_path);
975 llapi_layout_free(path_layout);
979 /* Inherit layout from the filesystem root. */
980 rc = llapi_search_mounts(path, 0, donor_path, NULL);
983 path_layout = llapi_layout_get_by_path(donor_path, 0);
984 if (path_layout == NULL)
987 inherit_sys_attributes(path_layout, donor_path);
992 * Get the striping layout for the file at \a path.
994 * If \a flags contains LAYOUT_GET_EXPECTED, substitute
995 * expected inherited attribute values for unspecified attributes. See
996 * llapi_layout_expected().
998 * \param[in] path path for which to get the layout
999 * \param[in] flags flags to control how layout is retrieved
1001 * \retval valid llapi_layout pointer on success
1002 * \retval NULL if an error occurs
1004 struct llapi_layout *llapi_layout_get_by_path(const char *path, uint32_t flags)
1006 struct llapi_layout *layout = NULL;
1010 if (flags & LAYOUT_GET_EXPECTED)
1011 return llapi_layout_expected(path);
1013 fd = open(path, O_RDONLY);
1017 layout = llapi_layout_get_by_fd(fd, flags);
1026 * Get the layout for the file with FID \a fidstr in filesystem \a lustre_dir.
1028 * \param[in] lustre_dir path within Lustre filesystem containing \a fid
1029 * \param[in] fid Lustre identifier of file to get layout for
1031 * \retval valid llapi_layout pointer on success
1032 * \retval NULL if an error occurs
1034 struct llapi_layout *llapi_layout_get_by_fid(const char *lustre_dir,
1035 const struct lu_fid *fid,
1040 int saved_msg_level = llapi_msg_get_level();
1041 struct llapi_layout *layout = NULL;
1043 /* Prevent llapi internal routines from writing to console
1044 * while executing this function, then restore previous message
1046 llapi_msg_set_level(LLAPI_MSG_OFF);
1047 fd = llapi_open_by_fid(lustre_dir, fid, O_RDONLY);
1048 llapi_msg_set_level(saved_msg_level);
1053 layout = llapi_layout_get_by_fd(fd, flags);
1062 * Get the stripe count of \a layout.
1064 * \param[in] layout layout to get stripe count from
1065 * \param[out] count integer to store stripe count in
1067 * \retval 0 on success
1068 * \retval -1 if arguments are invalid
1070 int llapi_layout_stripe_count_get(const struct llapi_layout *layout,
1073 struct llapi_layout_comp *comp;
1075 comp = __llapi_layout_cur_comp(layout);
1079 if (count == NULL) {
1084 *count = comp->llc_stripe_count;
1090 * The llapi_layout API functions have these extra validity checks since
1091 * they use intuitively named macros to denote special behavior, whereas
1092 * the old API uses 0 and -1.
1095 static bool llapi_layout_stripe_count_is_valid(int64_t stripe_count)
1097 return stripe_count == LLAPI_LAYOUT_DEFAULT ||
1098 stripe_count == LLAPI_LAYOUT_WIDE ||
1099 (stripe_count != 0 && stripe_count != -1 &&
1100 llapi_stripe_count_is_valid(stripe_count));
1103 static bool llapi_layout_stripe_size_is_valid(uint64_t stripe_size)
1105 return stripe_size == LLAPI_LAYOUT_DEFAULT ||
1106 (stripe_size != 0 &&
1107 llapi_stripe_size_is_aligned(stripe_size) &&
1108 !llapi_stripe_size_is_too_big(stripe_size));
1111 static bool llapi_layout_stripe_index_is_valid(int64_t stripe_index)
1113 return stripe_index == LLAPI_LAYOUT_DEFAULT ||
1114 (stripe_index >= 0 &&
1115 llapi_stripe_index_is_valid(stripe_index));
1119 * Set the stripe count of \a layout.
1121 * \param[in] layout layout to set stripe count in
1122 * \param[in] count value to be set
1124 * \retval 0 on success
1125 * \retval -1 if arguments are invalid
1127 int llapi_layout_stripe_count_set(struct llapi_layout *layout,
1130 struct llapi_layout_comp *comp;
1132 comp = __llapi_layout_cur_comp(layout);
1136 if (!llapi_layout_stripe_count_is_valid(count)) {
1141 comp->llc_stripe_count = count;
1147 * Get the stripe size of \a layout.
1149 * \param[in] layout layout to get stripe size from
1150 * \param[out] size integer to store stripe size in
1152 * \retval 0 on success
1153 * \retval -1 if arguments are invalid
1155 int llapi_layout_stripe_size_get(const struct llapi_layout *layout,
1158 struct llapi_layout_comp *comp;
1160 comp = __llapi_layout_cur_comp(layout);
1169 *size = comp->llc_stripe_size;
1175 * Set the stripe size of \a layout.
1177 * \param[in] layout layout to set stripe size in
1178 * \param[in] size value to be set
1180 * \retval 0 on success
1181 * \retval -1 if arguments are invalid
1183 int llapi_layout_stripe_size_set(struct llapi_layout *layout,
1186 struct llapi_layout_comp *comp;
1188 comp = __llapi_layout_cur_comp(layout);
1192 if (!llapi_layout_stripe_size_is_valid(size)) {
1197 comp->llc_stripe_size = size;
1203 * Get the RAID pattern of \a layout.
1205 * \param[in] layout layout to get pattern from
1206 * \param[out] pattern integer to store pattern in
1208 * \retval 0 on success
1209 * \retval -1 if arguments are invalid
1211 int llapi_layout_pattern_get(const struct llapi_layout *layout,
1214 struct llapi_layout_comp *comp;
1216 comp = __llapi_layout_cur_comp(layout);
1220 if (pattern == NULL) {
1225 *pattern = comp->llc_pattern;
1231 * Set the pattern of \a layout.
1233 * \param[in] layout layout to set pattern in
1234 * \param[in] pattern value to be set
1236 * \retval 0 on success
1237 * \retval -1 if arguments are invalid or RAID pattern
1240 int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
1242 struct llapi_layout_comp *comp;
1244 comp = __llapi_layout_cur_comp(layout);
1248 if (pattern != LLAPI_LAYOUT_DEFAULT &&
1249 pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT) {
1254 comp->llc_pattern = pattern |
1255 (comp->llc_pattern & LLAPI_LAYOUT_SPECIFIC);
1260 static inline int stripe_number_roundup(int stripe_number)
1262 unsigned int round_up = (stripe_number + 8) & ~7;
1263 return round_up > LOV_MAX_STRIPE_COUNT ?
1264 LOV_MAX_STRIPE_COUNT : round_up;
1268 * Set the OST index of stripe number \a stripe_number to \a ost_index.
1270 * If only the starting stripe's OST index is specified, then this can use
1271 * the normal LOV_MAGIC_{V1,V3} layout type. If multiple OST indices are
1272 * given, then allocate an array to hold the list of indices and ensure that
1273 * the LOV_USER_MAGIC_SPECIFIC layout is used when creating the file.
1275 * \param[in] layout layout to set OST index in
1276 * \param[in] stripe_number stripe number to set index for
1277 * \param[in] ost_index the index to set
1279 * \retval 0 on success
1280 * \retval -1 if arguments are invalid or an unsupported stripe number
1281 * was specified, error returned in errno
1283 int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number,
1286 struct llapi_layout_comp *comp;
1288 comp = __llapi_layout_cur_comp(layout);
1292 if (!llapi_layout_stripe_index_is_valid(ost_index)) {
1297 if (stripe_number == 0 && ost_index == LLAPI_LAYOUT_DEFAULT) {
1298 comp->llc_stripe_offset = ost_index;
1299 comp->llc_pattern &= ~LLAPI_LAYOUT_SPECIFIC;
1300 __llapi_comp_objects_realloc(comp, 0);
1301 } else if (stripe_number >= 0 &&
1302 stripe_number < LOV_MAX_STRIPE_COUNT) {
1303 if (ost_index >= LLAPI_LAYOUT_IDX_MAX) {
1308 /* Preallocate a few more stripes to avoid realloc() overhead.*/
1309 if (__llapi_comp_objects_realloc(comp,
1310 stripe_number_roundup(stripe_number)) < 0)
1313 comp->llc_objects[stripe_number].l_ost_idx = ost_index;
1315 if (stripe_number == 0)
1316 comp->llc_stripe_offset = ost_index;
1318 comp->llc_pattern |= LLAPI_LAYOUT_SPECIFIC;
1320 if (comp->llc_stripe_count == LLAPI_LAYOUT_DEFAULT ||
1321 comp->llc_stripe_count <= stripe_number)
1322 comp->llc_stripe_count = stripe_number + 1;
1332 * Get the OST index associated with stripe \a stripe_number.
1334 * Stripes are indexed starting from zero.
1336 * \param[in] layout layout to get index from
1337 * \param[in] stripe_number stripe number to get index for
1338 * \param[out] index integer to store index in
1340 * \retval 0 on success
1341 * \retval -1 if arguments are invalid
1343 int llapi_layout_ost_index_get(const struct llapi_layout *layout,
1344 uint64_t stripe_number, uint64_t *index)
1346 struct llapi_layout_comp *comp;
1348 comp = __llapi_layout_cur_comp(layout);
1352 if (index == NULL) {
1357 if (stripe_number >= comp->llc_stripe_count ||
1358 stripe_number >= comp->llc_objects_count) {
1363 if (comp->llc_stripe_offset == LLAPI_LAYOUT_DEFAULT)
1364 *index = LLAPI_LAYOUT_DEFAULT;
1366 *index = comp->llc_objects[stripe_number].l_ost_idx;
1373 * Get the pool name of layout \a layout.
1375 * \param[in] layout layout to get pool name from
1376 * \param[out] dest buffer to store pool name in
1377 * \param[in] n size in bytes of buffer \a dest
1379 * \retval 0 on success
1380 * \retval -1 if arguments are invalid
1382 int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest,
1385 struct llapi_layout_comp *comp;
1387 comp = __llapi_layout_cur_comp(layout);
1396 strncpy(dest, comp->llc_pool_name, n);
1402 * Set the name of the pool of layout \a layout.
1404 * \param[in] layout layout to set pool name in
1405 * \param[in] pool_name pool name to set
1407 * \retval 0 on success
1408 * \retval -1 if arguments are invalid or pool name is too long
1410 int llapi_layout_pool_name_set(struct llapi_layout *layout,
1411 const char *pool_name)
1413 struct llapi_layout_comp *comp;
1416 comp = __llapi_layout_cur_comp(layout);
1420 if (pool_name == NULL) {
1425 /* Strip off any 'fsname.' portion. */
1426 ptr = strchr(pool_name, '.');
1428 pool_name = ptr + 1;
1430 if (strlen(pool_name) > LOV_MAXPOOLNAME) {
1435 strncpy(comp->llc_pool_name, pool_name, sizeof(comp->llc_pool_name));
1441 * Open and possibly create a file with a given \a layout.
1443 * If \a layout is NULL this function acts as a simple wrapper for
1444 * open(). By convention, ENOTTY is returned in errno if \a path
1445 * refers to a non-Lustre file.
1447 * \param[in] path name of the file to open
1448 * \param[in] open_flags open() flags
1449 * \param[in] mode permissions to create file, filtered by umask
1450 * \param[in] layout layout to create new file with
1452 * \retval non-negative file descriptor on successful open
1453 * \retval -1 if an error occurred
1455 int llapi_layout_file_open(const char *path, int open_flags, mode_t mode,
1456 const struct llapi_layout *layout)
1461 struct lov_user_md *lum;
1465 (layout != NULL && layout->llot_magic != LLAPI_LAYOUT_MAGIC)) {
1470 /* Object creation must be postponed until after layout attributes
1471 * have been applied. */
1472 if (layout != NULL && (open_flags & O_CREAT))
1473 open_flags |= O_LOV_DELAY_CREATE;
1475 fd = open(path, open_flags, mode);
1477 if (layout == NULL || fd < 0)
1480 lum = llapi_layout_to_lum(layout);
1489 if (lum->lmm_magic == LOV_USER_MAGIC_COMP_V1)
1490 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
1491 else if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC)
1492 lum_size = lov_user_md_size(lum->lmm_stripe_count,
1495 lum_size = lov_user_md_size(0, lum->lmm_magic);
1497 rc = fsetxattr(fd, XATTR_LUSTRE_LOV, lum, lum_size, 0);
1506 errno = errno == EOPNOTSUPP ? ENOTTY : errno;
1512 * Create a file with a given \a layout.
1514 * Force O_CREAT and O_EXCL flags on so caller is assured that file was
1515 * created with the given \a layout on successful function return.
1517 * \param[in] path name of the file to open
1518 * \param[in] open_flags open() flags
1519 * \param[in] mode permissions to create new file with
1520 * \param[in] layout layout to create new file with
1522 * \retval non-negative file descriptor on successful open
1523 * \retval -1 if an error occurred
1525 int llapi_layout_file_create(const char *path, int open_flags, int mode,
1526 const struct llapi_layout *layout)
1528 return llapi_layout_file_open(path, open_flags|O_CREAT|O_EXCL, mode,
1532 int llapi_layout_flags_get(struct llapi_layout *layout, uint32_t *flags)
1534 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1539 *flags = layout->llot_flags;
1544 * Set flags to the header of a component layout.
1546 int llapi_layout_flags_set(struct llapi_layout *layout, uint32_t flags)
1548 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1553 layout->llot_flags = flags;
1557 const char *llapi_layout_flags_string(uint32_t flags)
1559 switch (flags & LCM_FL_FLR_MASK) {
1562 case LCM_FL_WRITE_PENDING:
1564 case LCM_FL_SYNC_PENDING:
1571 const __u16 llapi_layout_string_flags(char *string)
1573 if (strncmp(string, "ro", strlen(string)) == 0)
1574 return LCM_FL_RDONLY;
1575 if (strncmp(string, "wp", strlen(string)) == 0)
1576 return LCM_FL_WRITE_PENDING;
1577 if (strncmp(string, "sp", strlen(string)) == 0)
1578 return LCM_FL_SYNC_PENDING;
1584 * llapi_layout_mirror_count_is_valid() - Check the validity of mirror count.
1585 * @count: Mirror count value to be checked.
1587 * This function checks the validity of mirror count.
1589 * Return: true on success or false on failure.
1591 static bool llapi_layout_mirror_count_is_valid(uint16_t count)
1593 return count >= 0 && count <= LUSTRE_MIRROR_COUNT_MAX;
1597 * llapi_layout_mirror_count_get() - Get mirror count from the header of
1599 * @layout: Layout to get mirror count from.
1600 * @count: Returned mirror count value.
1602 * This function gets mirror count from the header of a layout.
1604 * Return: 0 on success or -1 on failure.
1606 int llapi_layout_mirror_count_get(struct llapi_layout *layout,
1609 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1614 *count = layout->llot_mirror_count;
1619 * llapi_layout_mirror_count_set() - Set mirror count to the header of a layout.
1620 * @layout: Layout to set mirror count in.
1621 * @count: Mirror count value to be set.
1623 * This function sets mirror count to the header of a layout.
1625 * Return: 0 on success or -1 on failure.
1627 int llapi_layout_mirror_count_set(struct llapi_layout *layout,
1630 if (layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
1635 if (!llapi_layout_mirror_count_is_valid(count)) {
1640 layout->llot_mirror_count = count;
1645 * Fetch the start and end offset of the current layout component.
1647 * \param[in] layout the layout component
1648 * \param[out] start extent start, inclusive
1649 * \param[out] end extent end, exclusive
1651 * \retval 0 on success
1652 * \retval <0 if error occurs
1654 int llapi_layout_comp_extent_get(const struct llapi_layout *layout,
1655 uint64_t *start, uint64_t *end)
1657 struct llapi_layout_comp *comp;
1659 comp = __llapi_layout_cur_comp(layout);
1663 if (start == NULL || end == NULL) {
1668 *start = comp->llc_extent.e_start;
1669 *end = comp->llc_extent.e_end;
1675 * Set the layout extent of a layout.
1677 * \param[in] layout the layout to be set
1678 * \param[in] start extent start, inclusive
1679 * \param[in] end extent end, exclusive
1681 * \retval 0 on success
1682 * \retval <0 if error occurs
1684 int llapi_layout_comp_extent_set(struct llapi_layout *layout,
1685 uint64_t start, uint64_t end)
1687 struct llapi_layout_comp *prev, *next, *comp;
1689 comp = __llapi_layout_cur_comp(layout);
1699 * We need to make sure the extent to be set is valid: the new
1700 * extent must be adjacent with the prev & next component.
1702 if (comp->llc_list.prev != &layout->llot_comp_list) {
1703 prev = list_entry(comp->llc_list.prev, typeof(*prev),
1705 if (start != 0 && start != prev->llc_extent.e_end) {
1711 if (comp->llc_list.next != &layout->llot_comp_list) {
1712 next = list_entry(comp->llc_list.next, typeof(*next),
1714 if (next->llc_extent.e_start != 0 &&
1715 end != next->llc_extent.e_start) {
1721 comp->llc_extent.e_start = start;
1722 comp->llc_extent.e_end = end;
1723 layout->llot_is_composite = true;
1729 * Gets the attribute flags of the current component.
1731 * \param[in] layout the layout component
1732 * \param[out] flags stored the returned component flags
1734 * \retval 0 on success
1735 * \retval <0 if error occurs
1737 int llapi_layout_comp_flags_get(const struct llapi_layout *layout,
1740 struct llapi_layout_comp *comp;
1742 comp = __llapi_layout_cur_comp(layout);
1746 if (flags == NULL) {
1751 *flags = comp->llc_flags;
1757 * Sets the specified flags of the current component leaving other flags as-is.
1759 * \param[in] layout the layout component
1760 * \param[in] flags component flags to be set
1762 * \retval 0 on success
1763 * \retval <0 if error occurs
1765 int llapi_layout_comp_flags_set(struct llapi_layout *layout, uint32_t flags)
1767 struct llapi_layout_comp *comp;
1769 comp = __llapi_layout_cur_comp(layout);
1773 comp->llc_flags |= flags;
1779 * Clears the flags specified in the flags leaving other flags as-is.
1781 * \param[in] layout the layout component
1782 * \param[in] flags component flags to be cleared
1784 * \retval 0 on success
1785 * \retval <0 if error occurs
1787 int llapi_layout_comp_flags_clear(struct llapi_layout *layout,
1790 struct llapi_layout_comp *comp;
1792 comp = __llapi_layout_cur_comp(layout);
1796 comp->llc_flags &= ~flags;
1802 * Fetches the file-unique component ID of the current layout component.
1804 * \param[in] layout the layout component
1805 * \param[out] id stored the returned component ID
1807 * \retval 0 on success
1808 * \retval <0 if error occurs
1810 int llapi_layout_comp_id_get(const struct llapi_layout *layout, uint32_t *id)
1812 struct llapi_layout_comp *comp;
1814 comp = __llapi_layout_cur_comp(layout);
1828 * Return the mirror id of the current layout component.
1830 * \param[in] layout the layout component
1831 * \param[out] id stored the returned mirror ID
1833 * \retval 0 on success
1834 * \retval <0 if error occurs
1836 int llapi_layout_mirror_id_get(const struct llapi_layout *layout, uint32_t *id)
1838 struct llapi_layout_comp *comp;
1840 comp = __llapi_layout_cur_comp(layout);
1849 *id = mirror_id_of(comp->llc_id);
1855 * Adds a component to \a layout, the new component will be added to
1856 * the tail of components list and it'll inherit attributes of existing
1857 * ones. The \a layout will change it's current component pointer to
1858 * the newly added component, and it'll be turned into a composite
1859 * layout if it was not before the adding.
1861 * \param[in] layout existing composite or plain layout
1863 * \retval 0 on success
1864 * \retval <0 if error occurs
1866 int llapi_layout_comp_add(struct llapi_layout *layout)
1868 struct llapi_layout_comp *last, *comp, *new;
1870 comp = __llapi_layout_cur_comp(layout);
1874 new = __llapi_comp_alloc(0);
1878 last = list_entry(layout->llot_comp_list.prev, typeof(*last),
1881 if (new->llc_extent.e_end <= last->llc_extent.e_end) {
1882 __llapi_comp_free(new);
1886 new->llc_extent.e_start = last->llc_extent.e_end;
1888 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1889 layout->llot_cur_comp = new;
1890 layout->llot_is_composite = true;
1895 * Adds a first component of a mirror to \a layout.
1896 * The \a layout will change it's current component pointer to
1897 * the newly added component, and it'll be turned into a composite
1898 * layout if it was not before the adding.
1900 * \param[in] layout existing composite or plain layout
1902 * \retval 0 on success
1903 * \retval <0 if error occurs
1905 int llapi_layout_add_first_comp(struct llapi_layout *layout)
1907 struct llapi_layout_comp *comp, *new;
1909 comp = __llapi_layout_cur_comp(layout);
1913 new = __llapi_comp_alloc(0);
1917 new->llc_extent.e_start = 0;
1919 list_add_tail(&new->llc_list, &layout->llot_comp_list);
1920 layout->llot_cur_comp = new;
1921 layout->llot_is_composite = true;
1927 * Deletes current component from the composite layout. The component
1928 * to be deleted must be the tail of components list, and it can't be
1929 * the only component in the layout.
1931 * \param[in] layout composite layout
1933 * \retval 0 on success
1934 * \retval <0 if error occurs
1936 int llapi_layout_comp_del(struct llapi_layout *layout)
1938 struct llapi_layout_comp *comp;
1940 comp = __llapi_layout_cur_comp(layout);
1944 if (!layout->llot_is_composite) {
1949 /* It must be the tail of the list (for PFL, can be relaxed
1950 * once we get mirrored components) */
1951 if (comp->llc_list.next != &layout->llot_comp_list) {
1955 /* It can't be the only one on the list */
1956 if (comp->llc_list.prev == &layout->llot_comp_list) {
1961 layout->llot_cur_comp =
1962 list_entry(comp->llc_list.prev, typeof(*comp), llc_list);
1963 list_del_init(&comp->llc_list);
1964 __llapi_comp_free(comp);
1970 * Move the current component pointer to the component with
1971 * specified component ID.
1973 * \param[in] layout composite layout
1974 * \param[in] id component ID
1976 * \retval =0 : moved successfully
1977 * \retval <0 if error occurs
1979 int llapi_layout_comp_use_id(struct llapi_layout *layout, uint32_t comp_id)
1981 struct llapi_layout_comp *comp;
1983 comp = __llapi_layout_cur_comp(layout);
1985 return -1; /* use previously set errno */
1987 if (!layout->llot_is_composite) {
1992 if (comp_id == LCME_ID_INVAL) {
1997 list_for_each_entry(comp, &layout->llot_comp_list, llc_list) {
1998 if (comp->llc_id == comp_id) {
1999 layout->llot_cur_comp = comp;
2008 * Move the current component pointer to a specified position.
2010 * \param[in] layout composite layout
2011 * \param[in] pos the position to be moved, it can be:
2012 * LLAPI_LAYOUT_COMP_USE_FIRST: use first component
2013 * LLAPI_LAYOUT_COMP_USE_LAST: use last component
2014 * LLAPI_LAYOUT_COMP_USE_NEXT: use component after current
2015 * LLAPI_LAYOUT_COMP_USE_PREV: use component before current
2017 * \retval =0 : moved successfully
2018 * \retval =1 : at last component with NEXT, at first component with PREV
2019 * \retval <0 if error occurs
2021 int llapi_layout_comp_use(struct llapi_layout *layout,
2022 enum llapi_layout_comp_use pos)
2024 struct llapi_layout_comp *comp, *head, *tail;
2026 comp = __llapi_layout_cur_comp(layout);
2030 if (!layout->llot_is_composite) {
2031 if (pos == LLAPI_LAYOUT_COMP_USE_FIRST ||
2032 pos == LLAPI_LAYOUT_COMP_USE_LAST)
2038 head = list_entry(layout->llot_comp_list.next, typeof(*head), llc_list);
2039 tail = list_entry(layout->llot_comp_list.prev, typeof(*tail), llc_list);
2041 case LLAPI_LAYOUT_COMP_USE_FIRST:
2042 layout->llot_cur_comp = head;
2044 case LLAPI_LAYOUT_COMP_USE_NEXT:
2049 layout->llot_cur_comp = list_entry(comp->llc_list.next,
2050 typeof(*comp), llc_list);
2052 case LLAPI_LAYOUT_COMP_USE_LAST:
2053 layout->llot_cur_comp = tail;
2055 case LLAPI_LAYOUT_COMP_USE_PREV:
2060 layout->llot_cur_comp = list_entry(comp->llc_list.prev,
2061 typeof(*comp), llc_list);
2072 * Add layout component(s) to an existing file.
2074 * \param[in] path The path name of the file
2075 * \param[in] layout The layout component(s) to be added
2077 int llapi_layout_file_comp_add(const char *path,
2078 const struct llapi_layout *layout)
2080 int rc, fd, lum_size, tmp_errno = 0;
2081 struct lov_user_md *lum;
2083 if (path == NULL || layout == NULL ||
2084 layout->llot_magic != LLAPI_LAYOUT_MAGIC) {
2089 lum = llapi_layout_to_lum(layout);
2093 if (lum->lmm_magic != LOV_USER_MAGIC_COMP_V1) {
2098 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2100 fd = open(path, O_RDWR);
2107 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".add", lum, lum_size, 0);
2122 * Delete component(s) by the specified component id or component flags
2123 * from an existing file.
2125 * \param[in] path path name of the file
2126 * \param[in] id unique component ID
2127 * \param[in] flags flags: LCME_FL_* or;
2128 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2130 int llapi_layout_file_comp_del(const char *path, uint32_t id, uint32_t flags)
2132 int rc, fd, lum_size;
2133 struct llapi_layout *layout;
2134 struct llapi_layout_comp *comp;
2135 struct lov_user_md *lum;
2137 if (path == NULL || id > LCME_ID_MAX || (flags & ~LCME_KNOWN_FLAGS)) {
2142 /* Can only specify ID or flags, not both. */
2143 if (id != 0 && flags != 0) {
2148 layout = llapi_layout_alloc();
2152 llapi_layout_comp_extent_set(layout, 0, LUSTRE_EOF);
2153 comp = __llapi_layout_cur_comp(layout);
2155 llapi_layout_free(layout);
2160 comp->llc_flags = flags;
2162 lum = llapi_layout_to_lum(layout);
2164 llapi_layout_free(layout);
2167 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2169 fd = open(path, O_RDWR);
2175 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".del", lum, lum_size, 0);
2177 int tmp_errno = errno;
2186 llapi_layout_free(layout);
2191 * Change flags or other parameters of the component(s) by component ID of an
2192 * existing file. The component to be modified is specified by the
2193 * comp->lcme_id value, which must be an unique component ID. The new
2194 * attributes are passed in by @comp and @valid is used to specify which
2195 * attributes in the component are going to be changed.
2197 * \param[in] path path name of the file
2198 * \param[in] ids An array of component IDs
2199 * \param[in] flags flags: LCME_FL_* or;
2200 * negative flags: (LCME_FL_NEG|LCME_FL_*)
2201 * \param[in] count Number of elements in ids and flags array
2203 int llapi_layout_file_comp_set(const char *path, uint32_t *ids, uint32_t *flags,
2206 int rc = -1, fd = -1, i;
2208 struct llapi_layout *layout;
2209 struct llapi_layout_comp *comp;
2210 struct lov_user_md *lum = NULL;
2220 for (i = 0; i < count; i++) {
2221 if (!ids[i] || !flags[i]) {
2226 if (ids[i] > LCME_ID_MAX || (flags[i] & ~LCME_KNOWN_FLAGS)) {
2231 /* do not allow to set or clear INIT flag */
2232 if (flags[i] & LCME_FL_INIT) {
2238 layout = __llapi_layout_alloc();
2242 layout->llot_is_composite = true;
2243 for (i = 0; i < count; i++) {
2244 comp = __llapi_comp_alloc(0);
2248 comp->llc_id = ids[i];
2249 comp->llc_flags = flags[i];
2251 list_add_tail(&comp->llc_list, &layout->llot_comp_list);
2252 layout->llot_cur_comp = comp;
2255 lum = llapi_layout_to_lum(layout);
2259 lum_size = ((struct lov_comp_md_v1 *)lum)->lcm_size;
2261 fd = open(path, O_RDWR);
2265 /* flush cached pages from clients */
2266 rc = llapi_file_flush(fd);
2273 rc = fsetxattr(fd, XATTR_LUSTRE_LOV".set.flags", lum, lum_size, 0);
2281 int tmp_errno = errno;
2288 llapi_layout_free(layout);
2293 * Check if the file layout is composite.
2295 * \param[in] layout the file layout to check
2297 * \retval true composite
2298 * \retval false not composite
2300 bool llapi_layout_is_composite(struct llapi_layout *layout)
2302 return layout->llot_is_composite;
2306 * Iterate every components in the @layout and call callback function @cb.
2308 * \param[in] layout component layout list.
2309 * \param[in] cb callback for each component
2310 * \param[in] cbdata callback data
2312 * \retval < 0 error happens during the iteration
2313 * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error
2314 * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration
2316 int llapi_layout_comp_iterate(struct llapi_layout *layout,
2317 llapi_layout_iter_cb cb, void *cbdata)
2321 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2326 * make sure on success llapi_layout_comp_use() API returns 0 with
2332 rc = cb(layout, cbdata);
2333 if (rc != LLAPI_LAYOUT_ITER_CONT)
2336 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2339 else if (rc == 1) /* reached the last comp */
2340 return LLAPI_LAYOUT_ITER_CONT;
2347 * llapi_layout_merge() - Merge a composite layout into another one.
2348 * @dst_layout: Destination composite layout.
2349 * @src_layout: Source composite layout.
2351 * This function copies all of the components from @src_layout and
2352 * appends them to @dst_layout.
2354 * Return: 0 on success or -1 on failure.
2356 int llapi_layout_merge(struct llapi_layout **dst_layout,
2357 const struct llapi_layout *src_layout)
2359 struct llapi_layout *new_layout = *dst_layout;
2360 struct llapi_layout_comp *new = NULL;
2361 struct llapi_layout_comp *comp = NULL;
2364 if (src_layout == NULL ||
2365 list_empty((struct list_head *)&src_layout->llot_comp_list))
2368 if (new_layout == NULL) {
2369 new_layout = __llapi_layout_alloc();
2370 if (new_layout == NULL) {
2376 list_for_each_entry(comp, &src_layout->llot_comp_list, llc_list) {
2377 new = __llapi_comp_alloc(0);
2383 new->llc_pattern = comp->llc_pattern;
2384 new->llc_stripe_size = comp->llc_stripe_size;
2385 new->llc_stripe_count = comp->llc_stripe_count;
2386 new->llc_stripe_offset = comp->llc_stripe_offset;
2388 if (comp->llc_pool_name[0] != '\0')
2389 strncpy(new->llc_pool_name, comp->llc_pool_name,
2390 sizeof(new->llc_pool_name));
2392 for (i = 0; i < comp->llc_objects_count; i++) {
2393 if (__llapi_comp_objects_realloc(new,
2394 stripe_number_roundup(i)) < 0) {
2396 __llapi_comp_free(new);
2399 new->llc_objects[i].l_ost_idx = \
2400 comp->llc_objects[i].l_ost_idx;
2403 new->llc_objects_count = comp->llc_objects_count;
2404 new->llc_extent.e_start = comp->llc_extent.e_start;
2405 new->llc_extent.e_end = comp->llc_extent.e_end;
2406 new->llc_id = comp->llc_id;
2407 new->llc_flags = comp->llc_flags;
2409 list_add_tail(&new->llc_list, &new_layout->llot_comp_list);
2410 new_layout->llot_cur_comp = new;
2412 new_layout->llot_is_composite = true;
2414 *dst_layout = new_layout;
2417 llapi_layout_free(new_layout);
2422 * Find all stale components.
2424 * \param[in] layout component layout list.
2425 * \param[out] comp array of stale component info.
2426 * \param[in] comp_size array size of @comp.
2427 * \param[in] mirror_ids array of mirror id that only components
2428 * belonging to these mirror will be collected.
2429 * \param[in] ids_nr number of mirror ids array.
2431 * \retval number of component info collected on sucess or
2432 * an error code on failure.
2434 int llapi_mirror_find_stale(struct llapi_layout *layout,
2435 struct llapi_resync_comp *comp, size_t comp_size,
2436 __u16 *mirror_ids, int ids_nr)
2441 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2449 uint64_t start, end;
2451 rc = llapi_layout_comp_flags_get(layout, &flags);
2455 if (!(flags & LCME_FL_STALE))
2458 rc = llapi_layout_mirror_id_get(layout, &mirror_id);
2462 /* the caller only wants stale components from specific
2467 for (j = 0; j < ids_nr; j++) {
2468 if (mirror_ids[j] == mirror_id)
2472 /* not in the specified mirror */
2475 } else if (flags & LCME_FL_NOSYNC) {
2476 /* if not specified mirrors, do not resync "nosync"
2481 rc = llapi_layout_comp_id_get(layout, &id);
2485 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2489 /* pack this component into @comp array */
2490 comp[idx].lrc_id = id;
2491 comp[idx].lrc_mirror_id = mirror_id;
2492 comp[idx].lrc_start = start;
2493 comp[idx].lrc_end = end;
2496 if (idx >= comp_size) {
2502 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2509 return rc < 0 ? rc : idx;
2512 /* locate @layout to a valid component covering file [file_start, file_end) */
2513 uint32_t llapi_mirror_find(struct llapi_layout *layout,
2514 uint64_t file_start, uint64_t file_end,
2517 uint32_t mirror_id = 0;
2520 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
2526 uint64_t start, end;
2527 uint32_t flags, id, rid;
2529 rc = llapi_layout_comp_flags_get(layout, &flags);
2533 if (flags & LCME_FL_STALE)
2536 rc = llapi_layout_mirror_id_get(layout, &rid);
2540 rc = llapi_layout_comp_id_get(layout, &id);
2544 rc = llapi_layout_comp_extent_get(layout, &start, &end);
2548 if (file_start >= start && file_start < end) {
2551 else if (mirror_id != rid || *endp != start)
2554 file_start = *endp = end;
2555 if (end >= file_end)
2560 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
2568 int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
2569 struct llapi_resync_comp *comp_array,
2570 int comp_size, uint64_t start, uint64_t end)
2573 size_t page_size = sysconf(_SC_PAGESIZE);
2574 const size_t buflen = 4 << 20; /* 4M */
2576 uint64_t pos = start;
2580 rc = posix_memalign(&buf, page_size, buflen);
2584 if (end == OBD_OBJECT_EOF)
2585 count = OBD_OBJECT_EOF;
2587 count = end - start;
2591 uint64_t mirror_end = 0;
2596 src = llapi_mirror_find(layout, pos, end, &mirror_end);
2600 if (mirror_end == OBD_OBJECT_EOF) {
2603 to_read = MIN(count, mirror_end - pos);
2604 to_read = (to_read + page_size - 1) & ~(page_size - 1);
2606 to_read = MIN(buflen, to_read);
2608 bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
2609 if (bytes_read == 0) {
2613 if (bytes_read < 0) {
2618 /* round up to page align to make direct IO happy. */
2619 to_write = (bytes_read + page_size - 1) & ~(page_size - 1);
2621 for (i = 0; i < comp_size; i++) {
2624 size_t to_write2 = to_write;
2626 /* skip non-overlapped component */
2627 if (pos >= comp_array[i].lrc_end ||
2628 pos + to_write <= comp_array[i].lrc_start)
2631 if (pos < comp_array[i].lrc_start)
2632 pos2 = comp_array[i].lrc_start;
2634 to_write2 -= pos2 - pos;
2636 if ((pos + to_write) > comp_array[i].lrc_end)
2637 to_write2 -= pos + to_write -
2638 comp_array[i].lrc_end;
2640 written = llapi_mirror_write(fd,
2641 comp_array[i].lrc_mirror_id,
2646 * this component is not written successfully,
2647 * mark it using its lrc_synced, it is supposed
2648 * to be false before getting here.
2650 * And before this function returns, all
2651 * elements of comp_array will reverse their
2652 * lrc_synced flag to reflect their true
2655 comp_array[i].lrc_synced = true;
2658 assert(written == to_write2);
2662 count -= bytes_read;
2668 for (i = 0; i < comp_size; i++)
2669 comp_array[i].lrc_synced = false;
2673 for (i = 0; i < comp_size; i++) {
2674 comp_array[i].lrc_synced = !comp_array[i].lrc_synced;
2675 if (comp_array[i].lrc_synced && pos & (page_size - 1)) {
2676 rc = llapi_mirror_truncate(fd,
2677 comp_array[i].lrc_mirror_id, pos);
2679 comp_array[i].lrc_synced = false;
2683 /* partially successful is successful */